udp.c 43.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
/*
 *	UDP over IPv6
3
 *	Linux INET6 implementation
Linus Torvalds's avatar
Linus Torvalds committed
4
5
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
Linus Torvalds's avatar
Linus Torvalds committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
 *
 *	Based on linux/ipv4/udp.c
 *
 *	Fixes:
 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
 *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
35
#include <linux/module.h>
36
#include <linux/skbuff.h>
37
#include <linux/slab.h>
38
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
39

40
#include <net/addrconf.h>
Linus Torvalds's avatar
Linus Torvalds committed
41
42
43
44
45
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/raw.h>
46
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
47
#include <net/ip6_checksum.h>
48
#include <net/ip6_tunnel.h>
Linus Torvalds's avatar
Linus Torvalds committed
49
#include <net/xfrm.h>
50
#include <net/inet_hashtables.h>
51
#include <net/inet6_hashtables.h>
52
#include <net/busy_poll.h>
53
#include <net/sock_reuseport.h>
Linus Torvalds's avatar
Linus Torvalds committed
54
55
56

#include <linux/proc_fs.h>
#include <linux/seq_file.h>
57
#include <trace/events/skb.h>
58
#include "udp_impl.h"
Linus Torvalds's avatar
Linus Torvalds committed
59

60
61
62
63
64
65
66
67
68
69
static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if defined(CONFIG_NET_L3_MASTER_DEV)
	if (!net->ipv4.sysctl_udp_l3mdev_accept &&
	    skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
		return true;
#endif
	return false;
}

70
71
72
73
74
static u32 udp6_ehashfn(const struct net *net,
			const struct in6_addr *laddr,
			const u16 lport,
			const struct in6_addr *faddr,
			const __be16 fport)
75
{
76
77
78
79
80
81
82
83
84
85
86
87
88
	static u32 udp6_ehash_secret __read_mostly;
	static u32 udp_ipv6_hash_secret __read_mostly;

	u32 lhash, fhash;

	net_get_random_once(&udp6_ehash_secret,
			    sizeof(udp6_ehash_secret));
	net_get_random_once(&udp_ipv6_hash_secret,
			    sizeof(udp_ipv6_hash_secret));

	lhash = (__force u32)laddr->s6_addr32[3];
	fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);

89
	return __inet6_ehashfn(lhash, lport, fhash, fport,
90
			       udp_ipv6_hash_secret + net_hash_mix(net));
91
92
}

93
int udp_v6_get_port(struct sock *sk, unsigned short snum)
Linus Torvalds's avatar
Linus Torvalds committed
94
{
Eric Dumazet's avatar
Eric Dumazet committed
95
	unsigned int hash2_nulladdr =
96
		ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
97
	unsigned int hash2_partial =
98
		ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
Eric Dumazet's avatar
Eric Dumazet committed
99

100
	/* precompute partial secondary hash */
Eric Dumazet's avatar
Eric Dumazet committed
101
	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
102
	return udp_lib_get_port(sk, snum, hash2_nulladdr);
Linus Torvalds's avatar
Linus Torvalds committed
103
104
}

105
void udp_v6_rehash(struct sock *sk)
Eric Dumazet's avatar
Eric Dumazet committed
106
{
107
	u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
108
					  &sk->sk_v6_rcv_saddr,
Eric Dumazet's avatar
Eric Dumazet committed
109
110
111
112
113
					  inet_sk(sk)->inet_num);

	udp_lib_rehash(sk, new_hash);
}

114
115
116
static int compute_score(struct sock *sk, struct net *net,
			 const struct in6_addr *saddr, __be16 sport,
			 const struct in6_addr *daddr, unsigned short hnum,
117
			 int dif, int sdif, bool exact_dif)
118
{
119
120
	int score;
	struct inet_sock *inet;
121
	bool dev_match;
122
123
124
125
126
127

	if (!net_eq(sock_net(sk), net) ||
	    udp_sk(sk)->udp_port_hash != hnum ||
	    sk->sk_family != PF_INET6)
		return -1;

128
129
130
	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
		return -1;

131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
	score = 0;
	inet = inet_sk(sk);

	if (inet->inet_dport) {
		if (inet->inet_dport != sport)
			return -1;
		score++;
	}

	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
			return -1;
		score++;
	}

146
147
148
149
	dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
	if (!dev_match)
		return -1;
	score++;
150

151
152
153
	if (sk->sk_incoming_cpu == raw_smp_processor_id())
		score++;

154
155
156
	return score;
}

157
/* called with rcu_read_lock() */
158
159
static struct sock *udp6_lib_lookup2(struct net *net,
		const struct in6_addr *saddr, __be16 sport,
160
161
162
		const struct in6_addr *daddr, unsigned int hnum,
		int dif, int sdif, bool exact_dif,
		struct udp_hslot *hslot2, struct sk_buff *skb)
163
164
{
	struct sock *sk, *result;
Paolo Abeni's avatar
Paolo Abeni committed
165
	int score, badness;
166
	u32 hash = 0;
167
168
169

	result = NULL;
	badness = -1;
170
	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
171
		score = compute_score(sk, net, saddr, sport,
172
				      daddr, hnum, dif, sdif, exact_dif);
173
		if (score > badness) {
Paolo Abeni's avatar
Paolo Abeni committed
174
			if (sk->sk_reuseport) {
175
176
				hash = udp6_ehashfn(net, daddr, hnum,
						    saddr, sport);
177

178
				result = reuseport_select_sock(sk, hash, skb,
179
							sizeof(struct udphdr));
180
181
				if (result)
					return result;
182
			}
183
184
			result = sk;
			badness = score;
185
186
187
188
189
		}
	}
	return result;
}

190
/* rcu_read_lock() must be held */
191
struct sock *__udp6_lib_lookup(struct net *net,
192
193
194
195
			       const struct in6_addr *saddr, __be16 sport,
			       const struct in6_addr *daddr, __be16 dport,
			       int dif, int sdif, struct udp_table *udptable,
			       struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
196
197
{
	unsigned short hnum = ntohs(dport);
198
199
200
	unsigned int hash2, slot2;
	struct udp_hslot *hslot2;
	struct sock *result;
201
	bool exact_dif = udp6_lib_exact_dif_match(net, skb);
202

203
204
205
206
207
208
209
210
211
	hash2 = ipv6_portaddr_hash(net, daddr, hnum);
	slot2 = hash2 & udptable->mask;
	hslot2 = &udptable->hash2[slot2];

	result = udp6_lib_lookup2(net, saddr, sport,
				  daddr, hnum, dif, sdif, exact_dif,
				  hslot2, skb);
	if (!result) {
		hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
212
		slot2 = hash2 & udptable->mask;
213

214
215
216
		hslot2 = &udptable->hash2[slot2];

		result = udp6_lib_lookup2(net, saddr, sport,
217
218
219
					  &in6addr_any, hnum, dif, sdif,
					  exact_dif, hslot2,
					  skb);
Linus Torvalds's avatar
Linus Torvalds committed
220
	}
221
222
	if (unlikely(IS_ERR(result)))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
223
224
	return result;
}
225
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
Linus Torvalds's avatar
Linus Torvalds committed
226

227
228
static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
					  __be16 sport, __be16 dport,
229
					  struct udp_table *udptable)
230
{
231
	const struct ipv6hdr *iph = ipv6_hdr(skb);
232

233
	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
Eric Dumazet's avatar
Eric Dumazet committed
234
				 &iph->daddr, dport, inet6_iif(skb),
235
				 inet6_sdif(skb), udptable, skb);
236
237
}

238
239
240
241
242
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
				 __be16 sport, __be16 dport)
{
	const struct ipv6hdr *iph = ipv6_hdr(skb);

243
	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
244
				 &iph->daddr, dport, inet6_iif(skb),
245
				 inet6_sdif(skb), &udp_table, skb);
246
247
248
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);

249
250
251
/* Must be called under rcu_read_lock().
 * Does increment socket refcount.
 */
252
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
253
254
255
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
			     const struct in6_addr *daddr, __be16 dport, int dif)
{
256
257
258
	struct sock *sk;

	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
259
				dif, 0, &udp_table, NULL);
260
	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
261
262
		sk = NULL;
	return sk;
263
264
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
265
#endif
266

Paolo Abeni's avatar
Paolo Abeni committed
267
268
269
270
271
272
273
274
275
/* do not use the scratch area len for jumbogram: their length execeeds the
 * scratch area space; note that the IP6CB flags is still in the first
 * cacheline, so checking for jumbograms is cheap
 */
static int udp6_skb_len(struct sk_buff *skb)
{
	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
}

Linus Torvalds's avatar
Linus Torvalds committed
276
/*
277
278
 *	This should be easy, if there is something there we
 *	return it, otherwise we block.
Linus Torvalds's avatar
Linus Torvalds committed
279
280
 */

281
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
Linus Torvalds's avatar
Linus Torvalds committed
282
283
284
285
		  int noblock, int flags, int *addr_len)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
286
	struct sk_buff *skb;
287
	unsigned int ulen, copied;
288
	int peeked, peeking, off;
289
290
	int err;
	int is_udplite = IS_UDPLITE(sk);
291
	bool checksum_valid = false;
292
	struct udp_mib *mib;
293
	int is_udp4;
Linus Torvalds's avatar
Linus Torvalds committed
294
295

	if (flags & MSG_ERRQUEUE)
296
		return ipv6_recv_error(sk, msg, len, addr_len);
Linus Torvalds's avatar
Linus Torvalds committed
297

298
	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
299
		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
300

Linus Torvalds's avatar
Linus Torvalds committed
301
try_again:
302
303
	peeking = flags & MSG_PEEK;
	off = sk_peek_offset(sk, flags);
304
	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
Linus Torvalds's avatar
Linus Torvalds committed
305
	if (!skb)
306
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
307

Paolo Abeni's avatar
Paolo Abeni committed
308
	ulen = udp6_skb_len(skb);
309
	copied = len;
310
311
	if (copied > ulen - off)
		copied = ulen - off;
312
	else if (copied < ulen)
313
		msg->msg_flags |= MSG_TRUNC;
Linus Torvalds's avatar
Linus Torvalds committed
314

315
	is_udp4 = (skb->protocol == htons(ETH_P_IP));
316
	mib = __UDPX_MIB(sk, is_udp4);
317

318
	/*
319
320
321
	 * If checksum is needed at all, try to do it while copying the
	 * data.  If the data is truncated, or if we only want a partial
	 * coverage checksum (UDP-Lite), do it before the copy.
322
323
	 */

324
325
	if (copied < ulen || peeking ||
	    (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
326
327
		checksum_valid = udp_skb_csum_unnecessary(skb) ||
				!__udp_lib_checksum_complete(skb);
328
		if (!checksum_valid)
Linus Torvalds's avatar
Linus Torvalds committed
329
			goto csum_copy_err;
330
331
	}

332
333
334
335
336
337
	if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
		if (udp_skb_is_linear(skb))
			err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
		else
			err = skb_copy_datagram_msg(skb, off, msg, copied);
	} else {
338
		err = skb_copy_and_csum_datagram_msg(skb, off, msg);
Linus Torvalds's avatar
Linus Torvalds committed
339
340
341
		if (err == -EINVAL)
			goto csum_copy_err;
	}
342
	if (unlikely(err)) {
343
344
		if (!peeked) {
			atomic_inc(&sk->sk_drops);
345
			SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
346
		}
347
		kfree_skb(skb);
348
		return err;
349
	}
350
351
	if (!peeked)
		SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
352

353
	sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
354
355
356

	/* Copy the address. */
	if (msg->msg_name) {
357
		DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
Linus Torvalds's avatar
Linus Torvalds committed
358
		sin6->sin6_family = AF_INET6;
359
		sin6->sin6_port = udp_hdr(skb)->source;
Linus Torvalds's avatar
Linus Torvalds committed
360
361
		sin6->sin6_flowinfo = 0;

362
		if (is_udp4) {
363
364
			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
					       &sin6->sin6_addr);
365
366
			sin6->sin6_scope_id = 0;
		} else {
Alexey Dobriyan's avatar
Alexey Dobriyan committed
367
			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
368
369
			sin6->sin6_scope_id =
				ipv6_iface_scope_id(&sin6->sin6_addr,
370
						    inet6_iif(skb));
Linus Torvalds's avatar
Linus Torvalds committed
371
		}
372
		*addr_len = sizeof(*sin6);
Linus Torvalds's avatar
Linus Torvalds committed
373
	}
374

375
376
377
	if (udp_sk(sk)->gro_enabled)
		udp_cmsg_recv(msg, sk, skb);

378
379
380
	if (np->rxopt.all)
		ip6_datagram_recv_common_ctl(sk, msg, skb);

381
	if (is_udp4) {
Linus Torvalds's avatar
Linus Torvalds committed
382
		if (inet->cmsg_flags)
383
			ip_cmsg_recv_offset(msg, sk, skb,
Eric Dumazet's avatar
Eric Dumazet committed
384
					    sizeof(struct udphdr), off);
Linus Torvalds's avatar
Linus Torvalds committed
385
386
	} else {
		if (np->rxopt.all)
387
			ip6_datagram_recv_specific_ctl(sk, msg, skb);
388
	}
Linus Torvalds's avatar
Linus Torvalds committed
389

390
	err = copied;
Linus Torvalds's avatar
Linus Torvalds committed
391
	if (flags & MSG_TRUNC)
392
		err = ulen;
Linus Torvalds's avatar
Linus Torvalds committed
393

394
	skb_consume_udp(sk, skb, peeking ? -err : err);
Linus Torvalds's avatar
Linus Torvalds committed
395
396
397
	return err;

csum_copy_err:
398
399
	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
				 udp_skb_destructor)) {
400
401
		SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
		SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
402
	}
403
	kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
404

405
406
	/* starting over for a new packet, but check if we need to yield */
	cond_resched();
407
	msg->msg_flags &= ~MSG_TRUNC;
Linus Torvalds's avatar
Linus Torvalds committed
408
409
410
	goto try_again;
}

411
412
413
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
Paolo Abeni's avatar
Paolo Abeni committed
414
	static_branch_inc(&udpv6_encap_needed_key);
415
416
417
}
EXPORT_SYMBOL(udpv6_encap_enable);

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
 * through error handlers in encapsulations looking for a match.
 */
static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
				      struct inet6_skb_parm *opt,
				      u8 type, u8 code, int offset, u32 info)
{
	int i;

	for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
		int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
			       u8 type, u8 code, int offset, u32 info);

		if (!ip6tun_encaps[i])
			continue;
		handler = rcu_dereference(ip6tun_encaps[i]->err_handler);
		if (handler && !handler(skb, opt, type, code, offset, info))
			return 0;
	}

	return -ENOENT;
}

441
442
443
444
445
446
447
/* Try to match ICMP errors to UDP tunnels by looking up a socket without
 * reversing source and destination port: this will match tunnels that force the
 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
 * lwtunnels might actually break this assumption by being configured with
 * different destination ports on endpoints, in this case we won't be able to
 * trace ICMP messages back to them.
 *
448
449
450
451
 * If this doesn't match any socket, probe tunnels with arbitrary destination
 * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
 * we've sent packets to won't necessarily match the local destination port.
 *
452
453
454
 * Then ask the tunnel implementation to match the error against a valid
 * association.
 *
455
456
 * Return an error if we can't find a match, the socket if we need further
 * processing, zero otherwise.
457
458
459
460
461
 */
static struct sock *__udp6_lib_err_encap(struct net *net,
					 const struct ipv6hdr *hdr, int offset,
					 struct udphdr *uh,
					 struct udp_table *udptable,
462
463
464
					 struct sk_buff *skb,
					 struct inet6_skb_parm *opt,
					 u8 type, u8 code, __be32 info)
465
466
467
468
469
470
471
472
473
474
475
476
477
{
	int network_offset, transport_offset;
	struct sock *sk;

	network_offset = skb_network_offset(skb);
	transport_offset = skb_transport_offset(skb);

	/* Network header needs to point to the outer IPv6 header inside ICMP */
	skb_reset_network_header(skb);

	/* Transport header needs to point to the UDP header */
	skb_set_transport_header(skb, offset);

478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
	sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
			       &hdr->saddr, uh->dest,
			       inet6_iif(skb), 0, udptable, skb);
	if (sk) {
		int (*lookup)(struct sock *sk, struct sk_buff *skb);
		struct udp_sock *up = udp_sk(sk);

		lookup = READ_ONCE(up->encap_err_lookup);
		if (!lookup || lookup(sk, skb))
			sk = NULL;
	}

	if (!sk) {
		sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
							offset, info));
	}
494
495
496

	skb_set_transport_header(skb, transport_offset);
	skb_set_network_header(skb, network_offset);
497

498
499
500
	return sk;
}

501
502
503
int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		   u8 type, u8 code, int offset, __be32 info,
		   struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
504
505
{
	struct ipv6_pinfo *np;
506
507
508
	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
	const struct in6_addr *saddr = &hdr->saddr;
	const struct in6_addr *daddr = &hdr->daddr;
509
	struct udphdr *uh = (struct udphdr *)(skb->data+offset);
510
	bool tunnel = false;
Linus Torvalds's avatar
Linus Torvalds committed
511
	struct sock *sk;
512
	int harderr;
Linus Torvalds's avatar
Linus Torvalds committed
513
	int err;
514
	struct net *net = dev_net(skb->dev);
Linus Torvalds's avatar
Linus Torvalds committed
515

516
	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
517
			       inet6_iif(skb), inet6_sdif(skb), udptable, skb);
518
	if (!sk) {
519
		/* No socket for error: try tunnels before discarding */
520
		sk = ERR_PTR(-ENOENT);
521
522
		if (static_branch_unlikely(&udpv6_encap_needed_key)) {
			sk = __udp6_lib_err_encap(net, hdr, offset, uh,
523
524
525
526
						  udptable, skb,
						  opt, type, code, info);
			if (!sk)
				return 0;
527
528
		}

529
		if (IS_ERR(sk)) {
530
531
			__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
					  ICMP6_MIB_INERRORS);
532
			return PTR_ERR(sk);
533
		}
534

535
		tunnel = true;
536
	}
Linus Torvalds's avatar
Linus Torvalds committed
537

538
539
540
	harderr = icmpv6_err_convert(type, code, &err);
	np = inet6_sk(sk);

541
542
543
	if (type == ICMPV6_PKT_TOOBIG) {
		if (!ip6_sk_accept_pmtu(sk))
			goto out;
544
		ip6_sk_update_pmtu(skb, sk, info);
545
546
		if (np->pmtudisc != IPV6_PMTUDISC_DONT)
			harderr = 1;
547
	}
548
	if (type == NDISC_REDIRECT) {
549
550
551
552
553
554
		if (tunnel) {
			ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
				     sk->sk_mark, sk->sk_uid);
		} else {
			ip6_sk_redirect(skb, sk);
		}
555
556
		goto out;
	}
557

558
559
560
561
	/* Tunnels don't have an application socket: don't pass errors back */
	if (tunnel)
		goto out;

562
563
564
565
	if (!np->recverr) {
		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
			goto out;
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
566
		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
567
	}
568

Linus Torvalds's avatar
Linus Torvalds committed
569
570
571
	sk->sk_err = err;
	sk->sk_error_report(sk);
out:
572
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
573
574
}

575
static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
576
577
578
{
	int rc;

579
	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
580
		sock_rps_save_rxhash(sk, skb);
581
		sk_mark_napi_id(sk, skb);
Eric Dumazet's avatar
Eric Dumazet committed
582
		sk_incoming_cpu_update(sk);
583
584
	} else {
		sk_mark_napi_id_once(sk, skb);
585
	}
586

587
	rc = __udp_enqueue_schedule_skb(sk, skb);
588
589
590
591
592
	if (rc < 0) {
		int is_udplite = IS_UDPLITE(sk);

		/* Note that an ENOMEM error is charged twice */
		if (rc == -ENOMEM)
593
			UDP6_INC_STATS(sock_net(sk),
594
					 UDP_MIB_RCVBUFERRORS, is_udplite);
595
		UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
596
597
598
		kfree_skb(skb);
		return -1;
	}
599

600
601
602
	return 0;
}

603
604
605
static __inline__ int udpv6_err(struct sk_buff *skb,
				struct inet6_skb_parm *opt, u8 type,
				u8 code, int offset, __be32 info)
606
{
607
	return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
608
609
}

610
static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
611
{
612
	struct udp_sock *up = udp_sk(sk);
613
	int is_udplite = IS_UDPLITE(sk);
614

615
616
	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
		goto drop;
Linus Torvalds's avatar
Linus Torvalds committed
617

618
	if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
619
620
621
622
623
624
625
626
627
628
629
630
631
632
		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);

		/*
		 * This is an encapsulation socket so pass the skb to
		 * the socket's udp_encap_rcv() hook. Otherwise, just
		 * fall through and pass this up the UDP socket.
		 * up->encap_rcv() returns the following value:
		 * =0 if skb was successfully passed to the encap
		 *    handler or was discarded by it.
		 * >0 if skb should be passed on to UDP.
		 * <0 if skb should be resubmitted as proto -N
		 */

		/* if we're overly short, let UDP handle it */
633
		encap_rcv = READ_ONCE(up->encap_rcv);
634
		if (encap_rcv) {
635
636
			int ret;

637
638
639
640
			/* Verify checksum before giving to encap */
			if (udp_lib_checksum_complete(skb))
				goto csum_error;

641
642
			ret = encap_rcv(sk, skb);
			if (ret <= 0) {
643
644
645
				__UDP_INC_STATS(sock_net(sk),
						UDP_MIB_INDATAGRAMS,
						is_udplite);
646
647
648
649
650
651
652
				return -ret;
			}
		}

		/* FALLTHROUGH -- it's a UDP Packet */
	}

653
654
655
	/*
	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
	 */
656
	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
657
658

		if (up->pcrlen == 0) {          /* full coverage was set  */
659
660
			net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
					    UDP_SKB_CB(skb)->cscov, skb->len);
661
662
663
			goto drop;
		}
		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
664
665
			net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
666
667
			goto drop;
		}
Linus Torvalds's avatar
Linus Torvalds committed
668
669
	}

670
	prefetch(&sk->sk_rmem_alloc);
671
672
673
674
	if (rcu_access_pointer(sk->sk_filter) &&
	    udp_lib_checksum_complete(skb))
		goto csum_error;

675
	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
676
		goto drop;
677

678
	udp_csum_pull_header(skb);
679

680
	skb_dst_drop(skb);
681

682
	return __udpv6_queue_rcv_skb(sk, skb);
683

684
csum_error:
685
	__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
686
drop:
687
	__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
688
	atomic_inc(&sk->sk_drops);
689
690
	kfree_skb(skb);
	return -1;
Linus Torvalds's avatar
Linus Torvalds committed
691
692
}

693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
	struct sk_buff *next, *segs;
	int ret;

	if (likely(!udp_unexpected_gso(sk, skb)))
		return udpv6_queue_rcv_one_skb(sk, skb);

	__skb_push(skb, -skb_mac_offset(skb));
	segs = udp_rcv_segment(sk, skb, false);
	for (skb = segs; skb; skb = next) {
		next = skb->next;
		__skb_pull(skb, skb_transport_offset(skb));

		ret = udpv6_queue_rcv_one_skb(sk, skb);
		if (ret > 0)
			ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
						 true);
	}
	return 0;
}

715
716
717
static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
				   __be16 loc_port, const struct in6_addr *loc_addr,
				   __be16 rmt_port, const struct in6_addr *rmt_addr,
718
				   int dif, int sdif, unsigned short hnum)
Linus Torvalds's avatar
Linus Torvalds committed
719
{
720
	struct inet_sock *inet = inet_sk(sk);
Linus Torvalds's avatar
Linus Torvalds committed
721

722
723
724
725
726
727
728
729
	if (!net_eq(sock_net(sk), net))
		return false;

	if (udp_sk(sk)->udp_port_hash != hnum ||
	    sk->sk_family != PF_INET6 ||
	    (inet->inet_dport && inet->inet_dport != rmt_port) ||
	    (!ipv6_addr_any(&sk->sk_v6_daddr) &&
		    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
730
	    !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) ||
731
732
	    (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
		    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
733
734
735
736
		return false;
	if (!inet6_mc_check(sk, loc_addr, rmt_addr))
		return false;
	return true;
Linus Torvalds's avatar
Linus Torvalds committed
737
738
}

739
740
741
742
743
static void udp6_csum_zero_error(struct sk_buff *skb)
{
	/* RFC 2460 section 8.1 says that we SHOULD log
	 * this error. Well, it is reasonable.
	 */
744
745
746
	net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
			    &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
			    &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
747
748
}

Linus Torvalds's avatar
Linus Torvalds committed
749
750
751
752
/*
 * Note: called only from the BH handler context,
 * so we don't need to lock the hashes.
 */
753
static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
754
		const struct in6_addr *saddr, const struct in6_addr *daddr,
755
		struct udp_table *udptable, int proto)
Linus Torvalds's avatar
Linus Torvalds committed
756
{
757
	struct sock *sk, *first = NULL;
758
	const struct udphdr *uh = udp_hdr(skb);
759
760
	unsigned short hnum = ntohs(uh->dest);
	struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
761
	unsigned int offset = offsetof(typeof(*sk), sk_node);
762
	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
763
	int dif = inet6_iif(skb);
764
	int sdif = inet6_sdif(skb);
765
766
	struct hlist_node *node;
	struct sk_buff *nskb;
767
768

	if (use_hash2) {
769
		hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
770
			    udptable->mask;
771
		hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
772
start_lookup:
773
		hslot = &udptable->hash2[hash2];
774
775
		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
	}
Linus Torvalds's avatar
Linus Torvalds committed
776

777
778
	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
		if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
779
780
					    uh->source, saddr, dif, sdif,
					    hnum))
781
782
783
784
785
786
787
788
789
790
791
792
793
			continue;
		/* If zero checksum and no_check is not on for
		 * the socket then skip it.
		 */
		if (!uh->check && !udp_sk(sk)->no_check6_rx)
			continue;
		if (!first) {
			first = sk;
			continue;
		}
		nskb = skb_clone(skb, GFP_ATOMIC);
		if (unlikely(!nskb)) {
			atomic_inc(&sk->sk_drops);
794
795
796
797
			__UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
					 IS_UDPLITE(sk));
			__UDP6_INC_STATS(net, UDP_MIB_INERRORS,
					 IS_UDPLITE(sk));
798
			continue;
Hideo Aoki's avatar
Hideo Aoki committed
799
		}
800

801
802
803
		if (udpv6_queue_rcv_skb(sk, nskb) > 0)
			consume_skb(nskb);
	}
804

805
806
807
808
809
810
	/* Also lookup *:port if we are using hash2 and haven't done so yet. */
	if (use_hash2 && hash2 != hash2_any) {
		hash2 = hash2_any;
		goto start_lookup;
	}

811
812
813
	if (first) {
		if (udpv6_queue_rcv_skb(first, skb) > 0)
			consume_skb(skb);
814
	} else {
815
		kfree_skb(skb);
816
817
		__UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
				 proto == IPPROTO_UDPLITE);
818
	}
819
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
820
821
}

822
823
824
825
826
827
828
829
830
static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
	if (udp_sk_rx_dst_set(sk, dst)) {
		const struct rt6_info *rt = (const struct rt6_info *)dst;

		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
	}
}

831
832
833
834
835
836
837
838
839
840
841
842
843
844
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
 * return code conversion for ip layer consumption
 */
static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
				struct udphdr *uh)
{
	int ret;

	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
		skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
					 ip6_compute_pseudo);

	ret = udpv6_queue_rcv_skb(sk, skb);

845
	/* a return value > 0 means to resubmit the input */
846
	if (ret > 0)
847
		return ret;
848
849
850
	return 0;
}

851
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
852
		   int proto)
Linus Torvalds's avatar
Linus Torvalds committed
853
{
854
	const struct in6_addr *saddr, *daddr;
855
	struct net *net = dev_net(skb->dev);
856
	struct udphdr *uh;
857
	struct sock *sk;
Linus Torvalds's avatar
Linus Torvalds committed
858
859
860
	u32 ulen = 0;

	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
861
		goto discard;
Linus Torvalds's avatar
Linus Torvalds committed
862

863
864
	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
865
	uh = udp_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
866
867

	ulen = ntohs(uh->len);
868
869
	if (ulen > skb->len)
		goto short_packet;
Linus Torvalds's avatar
Linus Torvalds committed
870

871
872
	if (proto == IPPROTO_UDP) {
		/* UDP validates ulen. */
Linus Torvalds's avatar
Linus Torvalds committed
873

874
875
876
		/* Check for jumbo payload */
		if (ulen == 0)
			ulen = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
877

878
879
		if (ulen < sizeof(*uh))
			goto short_packet;
Linus Torvalds's avatar
Linus Torvalds committed
880

881
882
883
		if (ulen < skb->len) {
			if (pskb_trim_rcsum(skb, ulen))
				goto short_packet;
884
885
			saddr = &ipv6_hdr(skb)->saddr;
			daddr = &ipv6_hdr(skb)->daddr;
886
			uh = udp_hdr(skb);
887
888
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
889

890
	if (udp6_csum_init(skb, uh, proto))
891
		goto csum_error;
892

893
894
895
896
897
898
899
	/* Check if the socket is already available, e.g. due to early demux */
	sk = skb_steal_sock(skb);
	if (sk) {
		struct dst_entry *dst = skb_dst(skb);
		int ret;

		if (unlikely(sk->sk_rx_dst != dst))
900
			udp6_sk_rx_dst_set(sk, dst);
901

902
903
904
905
		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
			sock_put(sk);
			goto report_csum_error;
		}
906

907
908
909
		ret = udp6_unicast_rcv_skb(sk, skb, uh);
		sock_put(sk);
		return ret;
910
911
	}

912
913
	/*
	 *	Multicast receive code
Linus Torvalds's avatar
Linus Torvalds committed
914
	 */
915
	if (ipv6_addr_is_multicast(daddr))
916
		return __udp6_lib_mcast_deliver(net, skb,
917
				saddr, daddr, udptable, proto);
Linus Torvalds's avatar
Linus Torvalds committed
918
919

	/* Unicast */
920
	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
921
	if (sk) {
922
923
924
		if (!uh->check && !udp_sk(sk)->no_check6_rx)
			goto report_csum_error;
		return udp6_unicast_rcv_skb(sk, skb, uh);
Linus Torvalds's avatar
Linus Torvalds committed
925
	}
926

927
928
	if (!uh->check)
		goto report_csum_error;
929

930
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
931
		goto discard;
932
933

	if (udp_lib_checksum_complete(skb))
934
		goto csum_error;
935

936
	__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
937
938
939
	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);

	kfree_skb(skb);
Stephen Hemminger's avatar
Stephen Hemminger committed
940
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
941

942
short_packet: