ip6_tunnel.c 56.8 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
 *	IPv6 tunneling device
Linus Torvalds's avatar
Linus Torvalds committed
3
4
5
 *	Linux INET6 implementation
 *
 *	Authors:
6
 *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
7
 *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
Linus Torvalds's avatar
Linus Torvalds committed
8
9
 *
 *      Based on:
10
 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
Linus Torvalds's avatar
Linus Torvalds committed
11
12
13
14
15
16
17
18
19
20
 *
 *      RFC 2473
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 */

21
22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

Linus Torvalds's avatar
Linus Torvalds committed
23
#include <linux/module.h>
24
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
25
26
27
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
28
#include <linux/icmp.h>
Linus Torvalds's avatar
Linus Torvalds committed
29
30
31
32
33
34
35
36
37
38
39
40
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
41
#include <linux/slab.h>
42
#include <linux/hash.h>
43
#include <linux/etherdevice.h>
Linus Torvalds's avatar
Linus Torvalds committed
44

45
#include <linux/uaccess.h>
Arun Sharma's avatar
Arun Sharma committed
46
#include <linux/atomic.h>
Linus Torvalds's avatar
Linus Torvalds committed
47

48
#include <net/icmp.h>
Linus Torvalds's avatar
Linus Torvalds committed
49
#include <net/ip.h>
50
#include <net/ip_tunnels.h>
Linus Torvalds's avatar
Linus Torvalds committed
51
52
53
54
55
56
57
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
58
59
#include <net/net_namespace.h>
#include <net/netns/generic.h>
60
#include <net/dst_metadata.h>
Linus Torvalds's avatar
Linus Torvalds committed
61
62

MODULE_AUTHOR("Ville Nuorvala");
63
MODULE_DESCRIPTION("IPv6 tunneling device");
Linus Torvalds's avatar
Linus Torvalds committed
64
MODULE_LICENSE("GPL");
65
MODULE_ALIAS_RTNL_LINK("ip6tnl");
66
MODULE_ALIAS_NETDEV("ip6tnl0");
Linus Torvalds's avatar
Linus Torvalds committed
67

68
69
#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
Linus Torvalds's avatar
Linus Torvalds committed
70

71
72
73
74
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");

75
76
77
78
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);

79
	return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
80
}
Linus Torvalds's avatar
Linus Torvalds committed
81

Eric Dumazet's avatar
Eric Dumazet committed
82
static int ip6_tnl_dev_init(struct net_device *dev);
83
static void ip6_tnl_dev_setup(struct net_device *dev);
84
static struct rtnl_link_ops ip6_link_ops __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
85

86
static unsigned int ip6_tnl_net_id __read_mostly;
87
struct ip6_tnl_net {
88
89
	/* the IPv6 tunnel fallback device */
	struct net_device *fb_tnl_dev;
90
	/* lists for storing tunnels in use */
91
	struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
Eric Dumazet's avatar
Eric Dumazet committed
92
93
	struct ip6_tnl __rcu *tnls_wc[1];
	struct ip6_tnl __rcu **tnls[2];
94
	struct ip6_tnl __rcu *collect_md_tun;
95
96
};

Eric Dumazet's avatar
Eric Dumazet committed
97
98
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
99
	struct pcpu_sw_netstats tmp, sum = { 0 };
Eric Dumazet's avatar
Eric Dumazet committed
100
101
102
	int i;

	for_each_possible_cpu(i) {
103
		unsigned int start;
104
105
		const struct pcpu_sw_netstats *tstats =
						   per_cpu_ptr(dev->tstats, i);
Eric Dumazet's avatar
Eric Dumazet committed
106

107
		do {
108
			start = u64_stats_fetch_begin_irq(&tstats->syncp);
109
110
111
112
			tmp.rx_packets = tstats->rx_packets;
			tmp.rx_bytes = tstats->rx_bytes;
			tmp.tx_packets = tstats->tx_packets;
			tmp.tx_bytes =  tstats->tx_bytes;
113
		} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
114
115
116
117
118

		sum.rx_packets += tmp.rx_packets;
		sum.rx_bytes   += tmp.rx_bytes;
		sum.tx_packets += tmp.tx_packets;
		sum.tx_bytes   += tmp.tx_bytes;
Eric Dumazet's avatar
Eric Dumazet committed
119
120
121
122
123
124
125
126
	}
	dev->stats.rx_packets = sum.rx_packets;
	dev->stats.rx_bytes   = sum.rx_bytes;
	dev->stats.tx_packets = sum.tx_packets;
	dev->stats.tx_bytes   = sum.tx_bytes;
	return &dev->stats;
}

Linus Torvalds's avatar
Linus Torvalds committed
127
/**
128
 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
129
130
 *   @remote: the address of the tunnel exit-point
 *   @local: the address of the tunnel entry-point
Linus Torvalds's avatar
Linus Torvalds committed
131
 *
132
 * Return:
Linus Torvalds's avatar
Linus Torvalds committed
133
 *   tunnel matching given end-points if found,
134
 *   else fallback tunnel if its device is up,
Linus Torvalds's avatar
Linus Torvalds committed
135
136
137
 *   else %NULL
 **/

138
139
140
#define for_each_ip6_tunnel_rcu(start) \
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))

Linus Torvalds's avatar
Linus Torvalds committed
141
static struct ip6_tnl *
142
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
Linus Torvalds's avatar
Linus Torvalds committed
143
{
144
	unsigned int hash = HASH(remote, local);
Linus Torvalds's avatar
Linus Torvalds committed
145
	struct ip6_tnl *t;
146
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
147
	struct in6_addr any;
Linus Torvalds's avatar
Linus Torvalds committed
148

149
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
Linus Torvalds's avatar
Linus Torvalds committed
150
151
152
153
154
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}
155
156
157
158
159

	memset(&any, 0, sizeof(any));
	hash = HASH(&any, local);
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
160
		    ipv6_addr_any(&t->parms.raddr) &&
161
162
163
164
165
166
167
		    (t->dev->flags & IFF_UP))
			return t;
	}

	hash = HASH(remote, &any);
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
		if (ipv6_addr_equal(remote, &t->parms.raddr) &&
168
		    ipv6_addr_any(&t->parms.laddr) &&
169
170
171
172
		    (t->dev->flags & IFF_UP))
			return t;
	}

173
	t = rcu_dereference(ip6n->collect_md_tun);
174
	if (t && t->dev->flags & IFF_UP)
175
176
		return t;

177
178
	t = rcu_dereference(ip6n->tnls_wc[0]);
	if (t && (t->dev->flags & IFF_UP))
Linus Torvalds's avatar
Linus Torvalds committed
179
180
181
182
183
184
		return t;

	return NULL;
}

/**
185
 * ip6_tnl_bucket - get head of list matching given tunnel parameters
186
 *   @p: parameters containing tunnel end-points
Linus Torvalds's avatar
Linus Torvalds committed
187
188
 *
 * Description:
189
 *   ip6_tnl_bucket() returns the head of the list matching the
Linus Torvalds's avatar
Linus Torvalds committed
190
191
 *   &struct in6_addr entries laddr and raddr in @p.
 *
192
 * Return: head of IPv6 tunnel list
Linus Torvalds's avatar
Linus Torvalds committed
193
194
 **/

Eric Dumazet's avatar
Eric Dumazet committed
195
static struct ip6_tnl __rcu **
xeb@mail.ru's avatar
xeb@mail.ru committed
196
ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
Linus Torvalds's avatar
Linus Torvalds committed
197
{
198
199
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
200
	unsigned int h = 0;
Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
204
	int prio = 0;

	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
		prio = 1;
205
		h = HASH(remote, local);
Linus Torvalds's avatar
Linus Torvalds committed
206
	}
207
	return &ip6n->tnls[prio][h];
Linus Torvalds's avatar
Linus Torvalds committed
208
209
210
}

/**
211
 * ip6_tnl_link - add tunnel to hash table
Linus Torvalds's avatar
Linus Torvalds committed
212
213
214
215
 *   @t: tunnel to be added
 **/

static void
216
ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
217
{
Eric Dumazet's avatar
Eric Dumazet committed
218
	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
Linus Torvalds's avatar
Linus Torvalds committed
219

220
221
	if (t->parms.collect_md)
		rcu_assign_pointer(ip6n->collect_md_tun, t);
222
223
	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
	rcu_assign_pointer(*tp, t);
Linus Torvalds's avatar
Linus Torvalds committed
224
225
226
}

/**
227
 * ip6_tnl_unlink - remove tunnel from hash table
Linus Torvalds's avatar
Linus Torvalds committed
228
229
230
231
 *   @t: tunnel to be removed
 **/

static void
232
ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
233
{
Eric Dumazet's avatar
Eric Dumazet committed
234
235
236
	struct ip6_tnl __rcu **tp;
	struct ip6_tnl *iter;

237
238
239
	if (t->parms.collect_md)
		rcu_assign_pointer(ip6n->collect_md_tun, NULL);

Eric Dumazet's avatar
Eric Dumazet committed
240
241
242
243
	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
	     (iter = rtnl_dereference(*tp)) != NULL;
	     tp = &iter->next) {
		if (t == iter) {
244
			rcu_assign_pointer(*tp, t->next);
Linus Torvalds's avatar
Linus Torvalds committed
245
246
247
248
249
			break;
		}
	}
}

Eric Dumazet's avatar
Eric Dumazet committed
250
251
static void ip6_dev_free(struct net_device *dev)
{
252
253
	struct ip6_tnl *t = netdev_priv(dev);

254
	gro_cells_destroy(&t->gro_cells);
255
	dst_cache_destroy(&t->dst_cache);
Eric Dumazet's avatar
Eric Dumazet committed
256
257
258
	free_percpu(dev->tstats);
}

259
260
261
262
263
264
265
266
267
static int ip6_tnl_create2(struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
	int err;

	t = netdev_priv(dev);

268
	dev->rtnl_link_ops = &ip6_link_ops;
269
270
271
272
273
274
275
276
277
278
279
280
281
282
	err = register_netdevice(dev);
	if (err < 0)
		goto out;

	strcpy(t->parms.name, dev->name);

	dev_hold(dev);
	ip6_tnl_link(ip6n, t);
	return 0;

out:
	return err;
}

Linus Torvalds's avatar
Linus Torvalds committed
283
/**
284
 * ip6_tnl_create - create a new tunnel
Linus Torvalds's avatar
Linus Torvalds committed
285
286
287
288
289
 *   @p: tunnel parameters
 *   @pt: pointer to new tunnel
 *
 * Description:
 *   Create tunnel matching given parameters.
290
291
 *
 * Return:
292
 *   created tunnel or error pointer
Linus Torvalds's avatar
Linus Torvalds committed
293
294
 **/

xeb@mail.ru's avatar
xeb@mail.ru committed
295
static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
Linus Torvalds's avatar
Linus Torvalds committed
296
297
298
299
{
	struct net_device *dev;
	struct ip6_tnl *t;
	char name[IFNAMSIZ];
300
	int err = -E2BIG;
Linus Torvalds's avatar
Linus Torvalds committed
301

302
303
304
	if (p->name[0]) {
		if (!dev_valid_name(p->name))
			goto failed;
Linus Torvalds's avatar
Linus Torvalds committed
305
		strlcpy(name, p->name, IFNAMSIZ);
306
	} else {
307
		sprintf(name, "ip6tnl%%d");
308
309
	}
	err = -ENOMEM;
310
311
	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
			   ip6_tnl_dev_setup);
312
	if (!dev)
313
		goto failed;
Linus Torvalds's avatar
Linus Torvalds committed
314

315
316
	dev_net_set(dev, net);

317
	t = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
318
	t->parms = *p;
Nicolas Dichtel's avatar
Nicolas Dichtel committed
319
	t->net = dev_net(dev);
320
	err = ip6_tnl_create2(dev);
Eric Dumazet's avatar
Eric Dumazet committed
321
322
	if (err < 0)
		goto failed_free;
Linus Torvalds's avatar
Linus Torvalds committed
323

324
	return t;
325
326

failed_free:
327
	free_netdev(dev);
328
failed:
329
	return ERR_PTR(err);
Linus Torvalds's avatar
Linus Torvalds committed
330
331
332
}

/**
333
 * ip6_tnl_locate - find or create tunnel matching given parameters
334
 *   @p: tunnel parameters
Linus Torvalds's avatar
Linus Torvalds committed
335
336
337
 *   @create: != 0 if allowed to create new tunnel if no match found
 *
 * Description:
338
 *   ip6_tnl_locate() first tries to locate an existing tunnel
Linus Torvalds's avatar
Linus Torvalds committed
339
340
341
342
 *   based on @parms. If this is unsuccessful, but @create is set a new
 *   tunnel device is created and registered for use.
 *
 * Return:
343
 *   matching tunnel or error pointer
Linus Torvalds's avatar
Linus Torvalds committed
344
345
 **/

346
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
xeb@mail.ru's avatar
xeb@mail.ru committed
347
		struct __ip6_tnl_parm *p, int create)
Linus Torvalds's avatar
Linus Torvalds committed
348
{
349
350
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
Eric Dumazet's avatar
Eric Dumazet committed
351
	struct ip6_tnl __rcu **tp;
Linus Torvalds's avatar
Linus Torvalds committed
352
	struct ip6_tnl *t;
353
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
354

Eric Dumazet's avatar
Eric Dumazet committed
355
356
357
	for (tp = ip6_tnl_bucket(ip6n, p);
	     (t = rtnl_dereference(*tp)) != NULL;
	     tp = &t->next) {
Linus Torvalds's avatar
Linus Torvalds committed
358
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
359
360
		    ipv6_addr_equal(remote, &t->parms.raddr)) {
			if (create)
361
				return ERR_PTR(-EEXIST);
362

363
			return t;
364
		}
Linus Torvalds's avatar
Linus Torvalds committed
365
366
	}
	if (!create)
367
		return ERR_PTR(-ENODEV);
368
	return ip6_tnl_create(net, p);
Linus Torvalds's avatar
Linus Torvalds committed
369
370
371
}

/**
372
 * ip6_tnl_dev_uninit - tunnel device uninitializer
Linus Torvalds's avatar
Linus Torvalds committed
373
 *   @dev: the device to be destroyed
374
 *
Linus Torvalds's avatar
Linus Torvalds committed
375
 * Description:
376
 *   ip6_tnl_dev_uninit() removes tunnel from its list
Linus Torvalds's avatar
Linus Torvalds committed
377
378
379
 **/

static void
380
ip6_tnl_dev_uninit(struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
381
{
382
	struct ip6_tnl *t = netdev_priv(dev);
Nicolas Dichtel's avatar
Nicolas Dichtel committed
383
	struct net *net = t->net;
384
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed
385

Eric Dumazet's avatar
Eric Dumazet committed
386
	if (dev == ip6n->fb_tnl_dev)
387
		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
Eric Dumazet's avatar
Eric Dumazet committed
388
	else
389
		ip6_tnl_unlink(ip6n, t);
390
	dst_cache_reset(&t->dst_cache);
Linus Torvalds's avatar
Linus Torvalds committed
391
392
393
394
395
396
397
	dev_put(dev);
}

/**
 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 *   @skb: received socket buffer
 *
398
399
 * Return:
 *   0 if none was found,
Linus Torvalds's avatar
Linus Torvalds committed
400
401
402
 *   else index to encapsulation limit
 **/

xeb@mail.ru's avatar
xeb@mail.ru committed
403
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
Linus Torvalds's avatar
Linus Torvalds committed
404
{
405
406
407
408
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
	unsigned int nhoff = raw - skb->data;
	unsigned int off = nhoff + sizeof(*ipv6h);
	u8 next, nexthdr = ipv6h->nexthdr;
Linus Torvalds's avatar
Linus Torvalds committed
409
410
411

	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
		struct ipv6_opt_hdr *hdr;
412
413
414
		u16 optlen;

		if (!pskb_may_pull(skb, off + sizeof(*hdr)))
Linus Torvalds's avatar
Linus Torvalds committed
415
416
			break;

417
		hdr = (struct ipv6_opt_hdr *)(skb->data + off);
Linus Torvalds's avatar
Linus Torvalds committed
418
419
420
421
422
423
424
425
426
427
		if (nexthdr == NEXTHDR_FRAGMENT) {
			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
			if (frag_hdr->frag_off)
				break;
			optlen = 8;
		} else if (nexthdr == NEXTHDR_AUTH) {
			optlen = (hdr->hdrlen + 2) << 2;
		} else {
			optlen = ipv6_optlen(hdr);
		}
428
429
430
431
		/* cache hdr->nexthdr, since pskb_may_pull() might
		 * invalidate hdr
		 */
		next = hdr->nexthdr;
Linus Torvalds's avatar
Linus Torvalds committed
432
		if (nexthdr == NEXTHDR_DEST) {
433
434
435
436
437
438
			u16 i = 2;

			/* Remember : hdr is no longer valid at this point. */
			if (!pskb_may_pull(skb, off + optlen))
				break;

Linus Torvalds's avatar
Linus Torvalds committed
439
440
441
442
			while (1) {
				struct ipv6_tlv_tnl_enc_lim *tel;

				/* No more room for encapsulation limit */
443
				if (i + sizeof(*tel) > optlen)
Linus Torvalds's avatar
Linus Torvalds committed
444
445
					break;

446
				tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
Linus Torvalds's avatar
Linus Torvalds committed
447
448
449
				/* return index of option if found and valid */
				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
				    tel->length == 1)
450
					return i + off - nhoff;
Linus Torvalds's avatar
Linus Torvalds committed
451
452
453
454
455
456
457
				/* else jump to next option */
				if (tel->type)
					i += tel->length + 2;
				else
					i++;
			}
		}
458
		nexthdr = next;
Linus Torvalds's avatar
Linus Torvalds committed
459
460
461
462
		off += optlen;
	}
	return 0;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
463
EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
Linus Torvalds's avatar
Linus Torvalds committed
464
465

/**
466
 * ip6_tnl_err - tunnel error handler
Linus Torvalds's avatar
Linus Torvalds committed
467
468
 *
 * Description:
469
 *   ip6_tnl_err() should handle errors in the tunnel according
Linus Torvalds's avatar
Linus Torvalds committed
470
471
472
 *   to the specifications in RFC 2473.
 **/

Herbert Xu's avatar
Herbert Xu committed
473
static int
474
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
475
	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
Linus Torvalds's avatar
Linus Torvalds committed
476
{
477
478
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
	struct net *net = dev_net(skb->dev);
479
480
	u8 rel_type = ICMPV6_DEST_UNREACH;
	u8 rel_code = ICMPV6_ADDR_UNREACH;
Linus Torvalds's avatar
Linus Torvalds committed
481
	__u32 rel_info = 0;
482
	struct ip6_tnl *t;
Herbert Xu's avatar
Herbert Xu committed
483
	int err = -ENOENT;
484
485
486
	int rel_msg = 0;
	u8 tproto;
	__u16 len;
Linus Torvalds's avatar
Linus Torvalds committed
487

488
489
	/* If the packet doesn't contain the original IPv6 header we are
	   in trouble since we might need the source address for further
Linus Torvalds's avatar
Linus Torvalds committed
490
491
	   processing of the error. */

492
	rcu_read_lock();
493
	t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
494
	if (!t)
Linus Torvalds's avatar
Linus Torvalds committed
495
496
		goto out;

497
	tproto = READ_ONCE(t->parms.proto);
498
	if (tproto != ipproto && tproto != 0)
499
500
		goto out;

Herbert Xu's avatar
Herbert Xu committed
501
502
	err = 0;

503
	switch (*type) {
Linus Torvalds's avatar
Linus Torvalds committed
504
		struct ipv6_tlv_tnl_enc_lim *tel;
505
		__u32 mtu, teli;
Linus Torvalds's avatar
Linus Torvalds committed
506
	case ICMPV6_DEST_UNREACH:
507
508
		net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
				    t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
509
510
511
		rel_msg = 1;
		break;
	case ICMPV6_TIME_EXCEED:
512
		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
513
514
			net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
					    t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
515
516
517
518
			rel_msg = 1;
		}
		break;
	case ICMPV6_PARAMPROB:
519
		teli = 0;
520
		if ((*code) == ICMPV6_HDR_FIELD)
xeb@mail.ru's avatar
xeb@mail.ru committed
521
			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
Linus Torvalds's avatar
Linus Torvalds committed
522

Al Viro's avatar
Al Viro committed
523
		if (teli && teli == *info - 2) {
Linus Torvalds's avatar
Linus Torvalds committed
524
525
			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
			if (tel->encap_limit == 0) {
526
527
				net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
						    t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
528
529
				rel_msg = 1;
			}
530
		} else {
531
532
			net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
					    t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
533
534
535
		}
		break;
	case ICMPV6_PKT_TOOBIG:
536
537
		ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
				sock_net_uid(net, NULL));
Al Viro's avatar
Al Viro committed
538
		mtu = *info - offset;
Linus Torvalds's avatar
Linus Torvalds committed
539
540
		if (mtu < IPV6_MIN_MTU)
			mtu = IPV6_MIN_MTU;
541
542
		len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
		if (len > mtu) {
Linus Torvalds's avatar
Linus Torvalds committed
543
544
545
546
547
548
			rel_type = ICMPV6_PKT_TOOBIG;
			rel_code = 0;
			rel_info = mtu;
			rel_msg = 1;
		}
		break;
549
550
551
552
	case NDISC_REDIRECT:
		ip6_redirect(skb, net, skb->dev->ifindex, 0,
			     sock_net_uid(net, NULL));
		break;
Linus Torvalds's avatar
Linus Torvalds committed
553
	}
554
555
556
557
558
559
560

	*type = rel_type;
	*code = rel_code;
	*info = rel_info;
	*msg = rel_msg;

out:
561
	rcu_read_unlock();
562
563
564
	return err;
}

565
566
static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
567
	   u8 type, u8 code, int offset, __be32 info)
568
{
Al Viro's avatar
Al Viro committed
569
	__u32 rel_info = ntohl(info);
570
	const struct iphdr *eiph;
571
572
573
574
	struct sk_buff *skb2;
	int err, rel_msg = 0;
	u8 rel_type = type;
	u8 rel_code = code;
575
	struct rtable *rt;
576
	struct flowi4 fl4;
577

578
579
	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
	if (err < 0)
		return err;

	if (rel_msg == 0)
		return 0;

	switch (rel_type) {
	case ICMPV6_DEST_UNREACH:
		if (rel_code != ICMPV6_ADDR_UNREACH)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_HOST_UNREACH;
		break;
	case ICMPV6_PKT_TOOBIG:
		if (rel_code != 0)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_FRAG_NEEDED;
		break;
	default:
		return 0;
	}

	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
		return 0;

	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (!skb2)
		return 0;

Eric Dumazet's avatar
Eric Dumazet committed
610
611
	skb_dst_drop(skb2);

612
	skb_pull(skb2, offset);
613
	skb_reset_network_header(skb2);
614
	eiph = ip_hdr(skb2);
615
616

	/* Try to guess incoming interface */
617
618
	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
				   0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
619
	if (IS_ERR(rt))
620
621
		goto out;

622
	skb2->dev = rt->dst.dev;
623
	ip_rt_put(rt);
624
625
626

	/* route "incoming" packet */
	if (rt->rt_flags & RTCF_LOCAL) {
627
		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
628
629
630
					   eiph->daddr, eiph->saddr, 0, 0,
					   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
		if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
631
632
			if (!IS_ERR(rt))
				ip_rt_put(rt);
633
634
			goto out;
		}
635
		skb_dst_set(skb2, &rt->dst);
636
637
638
	} else {
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
				   skb2->dev) ||
Eric Dumazet's avatar
Eric Dumazet committed
639
		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
640
641
642
643
644
			goto out;
	}

	/* change mtu on this route */
	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
Eric Dumazet's avatar
Eric Dumazet committed
645
		if (rel_info > dst_mtu(skb_dst(skb2)))
646
647
			goto out;

648
		skb_dst_update_pmtu(skb2, rel_info);
649
650
	}

Al Viro's avatar
Al Viro committed
651
	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
652
653
654
655
656
657

out:
	kfree_skb(skb2);
	return 0;
}

658
659
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
660
	   u8 type, u8 code, int offset, __be32 info)
661
{
662
663
	__u32 rel_info = ntohl(info);
	int err, rel_msg = 0;
664
665
	u8 rel_type = type;
	u8 rel_code = code;
666

667
668
	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
669
670
671
672
	if (err < 0)
		return err;

	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
Linus Torvalds's avatar
Linus Torvalds committed
673
674
		struct rt6_info *rt;
		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
675

Linus Torvalds's avatar
Linus Torvalds committed
676
		if (!skb2)
677
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
678

Eric Dumazet's avatar
Eric Dumazet committed
679
		skb_dst_drop(skb2);
Linus Torvalds's avatar
Linus Torvalds committed
680
		skb_pull(skb2, offset);
681
		skb_reset_network_header(skb2);
Linus Torvalds's avatar
Linus Torvalds committed
682
683

		/* Try to guess incoming interface */
684
		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
685
				NULL, 0, skb2, 0);
Linus Torvalds's avatar
Linus Torvalds committed
686

687
688
		if (rt && rt->dst.dev)
			skb2->dev = rt->dst.dev;
Linus Torvalds's avatar
Linus Torvalds committed
689

690
		icmpv6_send(skb2, rel_type, rel_code, rel_info);
Linus Torvalds's avatar
Linus Torvalds committed
691

Amerigo Wang's avatar
Amerigo Wang committed
692
		ip6_rt_put(rt);
Linus Torvalds's avatar
Linus Torvalds committed
693
694
695

		kfree_skb(skb2);
	}
696
697

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
698
699
}

700
701
702
static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
703
704
705
706
{
	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;

	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
707
		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
708

709
	return IP6_ECN_decapsulate(ipv6h, skb);
710
711
}

712
713
714
static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
715
{
716
	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
717
		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
Linus Torvalds's avatar
Linus Torvalds committed
718

719
	return IP6_ECN_decapsulate(ipv6h, skb);
Linus Torvalds's avatar
Linus Torvalds committed
720
}
721

xeb@mail.ru's avatar
xeb@mail.ru committed
722
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
723
724
725
			     const struct in6_addr *laddr,
			     const struct in6_addr *raddr)
{
xeb@mail.ru's avatar
xeb@mail.ru committed
726
	struct __ip6_tnl_parm *p = &t->parms;
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
	int ltype = ipv6_addr_type(laddr);
	int rtype = ipv6_addr_type(raddr);
	__u32 flags = 0;

	if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
		flags = IP6_TNL_F_CAP_PER_PACKET;
	} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
		   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
		if (ltype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_XMIT;
		if (rtype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_RCV;
	}
	return flags;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
744
EXPORT_SYMBOL(ip6_tnl_get_cap);
745

Eric Dumazet's avatar
Eric Dumazet committed
746
/* called with rcu_read_lock() */
xeb@mail.ru's avatar
xeb@mail.ru committed
747
int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
748
749
				  const struct in6_addr *laddr,
				  const struct in6_addr *raddr)
750
{
xeb@mail.ru's avatar
xeb@mail.ru committed
751
	struct __ip6_tnl_parm *p = &t->parms;
752
	int ret = 0;
Nicolas Dichtel's avatar
Nicolas Dichtel committed
753
	struct net *net = t->net;
754

755
756
757
	if ((p->flags & IP6_TNL_F_CAP_RCV) ||
	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
758
		struct net_device *ldev = NULL;
759
760

		if (p->link)
Eric Dumazet's avatar
Eric Dumazet committed
761
			ldev = dev_get_by_index_rcu(net, p->link);
762

763
		if ((ipv6_addr_is_multicast(laddr) ||
764
765
		     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
						    0, IFA_F_TENTATIVE))) &&
766
		    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
767
768
		     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
						     0, IFA_F_TENTATIVE))))
769
770
771
772
			ret = 1;
	}
	return ret;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
773
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
Linus Torvalds's avatar
Linus Torvalds committed
774

775
776
777
778
779
780
781
static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
			 const struct tnl_ptk_info *tpi,
			 struct metadata_dst *tun_dst,
			 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
						const struct ipv6hdr *ipv6h,
						struct sk_buff *skb),
			 bool log_ecn_err)
Linus Torvalds's avatar
Linus Torvalds committed
782
{
783
	struct pcpu_sw_netstats *tstats;
784
	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
785
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
786

787
788
789
790
791
792
793
794
	if ((!(tpi->flags & TUNNEL_CSUM) &&
	     (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
	    ((tpi->flags & TUNNEL_CSUM) &&
	     !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
		tunnel->dev->stats.rx_crc_errors++;
		tunnel->dev->stats.rx_errors++;
		goto drop;
	}
Eric Dumazet's avatar
Eric Dumazet committed
795

796
797
798
799
800
801
802
	if (tunnel->parms.i_flags & TUNNEL_SEQ) {
		if (!(tpi->flags & TUNNEL_SEQ) ||
		    (tunnel->i_seqno &&
		     (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
			tunnel->dev->stats.rx_fifo_errors++;
			tunnel->dev->stats.rx_errors++;
			goto drop;
803
		}
804
805
		tunnel->i_seqno = ntohl(tpi->seq) + 1;
	}
806

807
	skb->protocol = tpi->proto;
Linus Torvalds's avatar
Linus Torvalds committed
808

809
810
811
812
813
814
	/* Warning: All skb pointers will be invalidated! */
	if (tunnel->dev->type == ARPHRD_ETHER) {
		if (!pskb_may_pull(skb, ETH_HLEN)) {
			tunnel->dev->stats.rx_length_errors++;
			tunnel->dev->stats.rx_errors++;
			goto drop;
Linus Torvalds's avatar
Linus Torvalds committed
815
		}
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838

		ipv6h = ipv6_hdr(skb);
		skb->protocol = eth_type_trans(skb, tunnel->dev);
		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
	} else {
		skb->dev = tunnel->dev;
	}

	skb_reset_network_header(skb);
	memset(skb->cb, 0, sizeof(struct inet6_skb_parm));

	__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);

	err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
	if (unlikely(err)) {
		if (log_ecn_err)
			net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
					     &ipv6h->saddr,
					     ipv6_get_dsfield(ipv6h));
		if (err > 1) {
			++tunnel->dev->stats.rx_frame_errors;
			++tunnel->dev->stats.rx_errors;
			goto drop;
839
		}
840
	}
841

842
843
844
845
846
	tstats = this_cpu_ptr(tunnel->dev->tstats);
	u64_stats_update_begin(&tstats->syncp);
	tstats->rx_packets++;
	tstats->rx_bytes += skb->len;
	u64_stats_update_end(&tstats->syncp);
Eric Dumazet's avatar
Eric Dumazet committed
847

848
	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
Eric Dumazet's avatar
Eric Dumazet committed
849

850
851
852
	if (tun_dst)
		skb_dst_set(skb, (struct dst_entry *)tun_dst);

853
854
855
856
	gro_cells_receive(&tunnel->gro_cells, skb);
	return 0;

drop:
857
858
	if (tun_dst)
		dst_release((struct dst_entry *)tun_dst);
859
860
861
862
863
864
865
866
867
	kfree_skb(skb);
	return 0;
}

int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
		const struct tnl_ptk_info *tpi,
		struct metadata_dst *tun_dst,
		bool log_ecn_err)
{
868
	return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
			     log_ecn_err);
}
EXPORT_SYMBOL(ip6_tnl_rcv);

static const struct tnl_ptk_info tpi_v6 = {
	/* no tunnel info required for ipxip6. */
	.proto = htons(ETH_P_IPV6),
};

static const struct tnl_ptk_info tpi_v4 = {
	/* no tunnel info required for ipxip6. */
	.proto = htons(ETH_P_IP),
};

static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
		      const struct tnl_ptk_info *tpi,
		      int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
						  const struct ipv6hdr *ipv6h,
						  struct sk_buff *skb))
{
	struct ip6_tnl *t;
	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
891
	struct metadata_dst *tun_dst = NULL;
892
893
894
895
896
897
	int ret = -1;

	rcu_read_lock();
	t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);

	if (t) {
898
		u8 tproto = READ_ONCE(t->parms.proto);
899
900
901
902
903

		if (tproto != ipproto && tproto != 0)
			goto drop;
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
			goto drop;
904
		ipv6h = ipv6_hdr(skb);
905
906
907
908
		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
			goto drop;
		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
			goto drop;
909
910
911
		if (t->parms.collect_md) {
			tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
			if (!tun_dst)
912
				goto drop;
913
914
		}
		ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
915
				    log_ecn_error);
Linus Torvalds's avatar
Linus Torvalds committed
916
	}
917

918
	rcu_read_unlock();
919

920
921
922
923
	return ret;

drop:
	rcu_read_unlock();
924
925
	kfree_skb(skb);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
926
927
}

928
929
static int ip4ip6_rcv(struct sk_buff *skb)
{
930
	return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
931
			  ip4ip6_dscp_ecn_decapsulate);
932
933
}

934
935
static int ip6ip6_rcv(struct sk_buff *skb)
{
936
937
	return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
			  ip6ip6_dscp_ecn_decapsulate);
938
939
}

940
941
942
943
struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
};
Linus Torvalds's avatar
Linus Torvalds committed
944

945
946
947
static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
{
	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
Linus Torvalds's avatar
Linus Torvalds committed
948

949
950
951
952
953
	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
	opt->dst_opt[3] = 1;
	opt->dst_opt[4] = encap_limit;
	opt->dst_opt[5] = IPV6_TLV_PADN;
	opt->dst_opt[6] = 1;
Linus Torvalds's avatar
Linus Torvalds committed
954

955
	opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt;
956
	opt->ops.opt_nflen = 8;
Linus Torvalds's avatar
Linus Torvalds committed
957
958
959
}

/**
960
 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
Linus Torvalds's avatar
Linus Torvalds committed
961
 *   @t: the outgoing tunnel device
YOSHIFUJI Hideaki's avatar