ip6mr.c 57 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/*
 *	Linux IPv6 multicast routing support for BSD pim6sd
 *	Based on net/ipv4/ipmr.c.
 *
 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
 *		LSIIT Laboratory, Strasbourg, France
 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
 *		6WIND, Paris, France
 *	Copyright (C)2007,2008 USAGI/WIDE Project
 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 */

#include <asm/uaccess.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
35
#include <linux/slab.h>
36
#include <linux/compat.h>
37
38
39
40
41
42
43
44
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
#include <net/checksum.h>
#include <net/netlink.h>
45
#include <net/fib_rules.h>
46
47
48
49

#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <linux/mroute6.h>
50
#include <linux/pim.h>
51
52
#include <net/addrconf.h>
#include <linux/netfilter_ipv6.h>
53
#include <linux/export.h>
Dave Jones's avatar
Dave Jones committed
54
#include <net/ip6_checksum.h>
55
#include <linux/netconf.h>
56

57
struct mr6_table {
58
	struct list_head	list;
59
	possible_net_t		net;
60
	u32			id;
61
62
63
64
65
66
67
	struct sock		*mroute6_sk;
	struct timer_list	ipmr_expire_timer;
	struct list_head	mfc6_unres_queue;
	struct list_head	mfc6_cache_array[MFC6_LINES];
	struct mif_device	vif6_table[MAXMIFS];
	int			maxvif;
	atomic_t		cache_resolve_queue_len;
68
69
	bool			mroute_do_assert;
	bool			mroute_do_pim;
70
71
72
73
74
#ifdef CONFIG_IPV6_PIMSM_V2
	int			mroute_reg_vif_num;
#endif
};

75
76
77
78
79
80
81
82
struct ip6mr_rule {
	struct fib_rule		common;
};

struct ip6mr_result {
	struct mr6_table	*mrt;
};

83
84
85
86
87
88
89
90
91
92
/* Big lock, protecting vif table, mrt cache and mroute socket state.
   Note that the changes are semaphored via rtnl_lock.
 */

static DEFINE_RWLOCK(mrt_lock);

/*
 *	Multicast router control variables
 */

93
#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94
95
96
97
98
99
100
101
102
103
104
105
106
107

/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);

/* We return to original Alan's scheme. Hash table of resolved
   entries is changed only in process context and protected
   with weak lock mrt_lock. Queue of unresolved entries is protected
   with strong spinlock mfc_unres_lock.

   In this case data path is free of exclusive locks at all.
 */

static struct kmem_cache *mrt_cachep __read_mostly;

108
109
110
static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr6_table *mrt);

111
112
static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
			   struct sk_buff *skb, struct mfc6_cache *cache);
113
static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114
			      mifi_t mifi, int assert);
115
116
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
			       struct mfc6_cache *c, struct rtmsg *rtm);
117
118
static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
			      int cmd);
119
120
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
			       struct netlink_callback *cb);
121
static void mroute_clean_tables(struct mr6_table *mrt);
122
123
124
static void ipmr_expire_process(unsigned long arg);

#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125
#define ip6mr_for_each_table(mrt, net) \
126
127
128
129
130
131
132
133
134
135
136
137
138
	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)

static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
{
	struct mr6_table *mrt;

	ip6mr_for_each_table(mrt, net) {
		if (mrt->id == id)
			return mrt;
	}
	return NULL;
}

139
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140
141
142
			    struct mr6_table **mrt)
{
	int err;
143
144
145
146
147
	struct ip6mr_result res;
	struct fib_lookup_arg arg = {
		.result = &res,
		.flags = FIB_LOOKUP_NOREF,
	};
148

149
150
	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
			       flowi6_to_flowi(flp6), 0, &arg);
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
	if (err < 0)
		return err;
	*mrt = res.mrt;
	return 0;
}

static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
			     int flags, struct fib_lookup_arg *arg)
{
	struct ip6mr_result *res = arg->result;
	struct mr6_table *mrt;

	switch (rule->action) {
	case FR_ACT_TO_TBL:
		break;
	case FR_ACT_UNREACHABLE:
		return -ENETUNREACH;
	case FR_ACT_PROHIBIT:
		return -EACCES;
	case FR_ACT_BLACKHOLE:
	default:
		return -EINVAL;
	}

	mrt = ip6mr_get_table(rule->fr_net, rule->table);
176
	if (!mrt)
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
		return -EAGAIN;
	res->mrt = mrt;
	return 0;
}

static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
{
	return 1;
}

static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
	FRA_GENERIC_POLICY,
};

static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
				struct fib_rule_hdr *frh, struct nlattr **tb)
{
	return 0;
}

static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
			      struct nlattr **tb)
{
	return 1;
}

static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
			   struct fib_rule_hdr *frh)
{
	frh->dst_len = 0;
	frh->src_len = 0;
	frh->tos     = 0;
	return 0;
}

212
static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
	.family		= RTNL_FAMILY_IP6MR,
	.rule_size	= sizeof(struct ip6mr_rule),
	.addr_size	= sizeof(struct in6_addr),
	.action		= ip6mr_rule_action,
	.match		= ip6mr_rule_match,
	.configure	= ip6mr_rule_configure,
	.compare	= ip6mr_rule_compare,
	.fill		= ip6mr_rule_fill,
	.nlgroup	= RTNLGRP_IPV6_RULE,
	.policy		= ip6mr_rule_policy,
	.owner		= THIS_MODULE,
};

static int __net_init ip6mr_rules_init(struct net *net)
{
	struct fib_rules_ops *ops;
	struct mr6_table *mrt;
	int err;

	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
	if (IS_ERR(ops))
		return PTR_ERR(ops);

	INIT_LIST_HEAD(&net->ipv6.mr6_tables);

	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239
	if (!mrt) {
240
241
242
243
244
245
246
247
248
249
250
251
		err = -ENOMEM;
		goto err1;
	}

	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
	if (err < 0)
		goto err2;

	net->ipv6.mr6_rules_ops = ops;
	return 0;

err2:
252
	ip6mr_free_table(mrt);
253
254
255
256
257
258
259
260
261
err1:
	fib_rules_unregister(ops);
	return err;
}

static void __net_exit ip6mr_rules_exit(struct net *net)
{
	struct mr6_table *mrt, *next;

262
	rtnl_lock();
Eric Dumazet's avatar
Eric Dumazet committed
263
264
	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
		list_del(&mrt->list);
265
		ip6mr_free_table(mrt);
Eric Dumazet's avatar
Eric Dumazet committed
266
	}
267
	fib_rules_unregister(net->ipv6.mr6_rules_ops);
268
	rtnl_unlock();
269
270
271
272
273
274
275
276
277
278
}
#else
#define ip6mr_for_each_table(mrt, net) \
	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)

static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
{
	return net->ipv6.mrt6;
}

279
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280
281
282
283
284
285
286
287
288
289
290
291
292
293
			    struct mr6_table **mrt)
{
	*mrt = net->ipv6.mrt6;
	return 0;
}

static int __net_init ip6mr_rules_init(struct net *net)
{
	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
	return net->ipv6.mrt6 ? 0 : -ENOMEM;
}

static void __net_exit ip6mr_rules_exit(struct net *net)
{
294
	rtnl_lock();
295
	ip6mr_free_table(net->ipv6.mrt6);
296
297
	net->ipv6.mrt6 = NULL;
	rtnl_unlock();
298
299
300
301
302
303
304
305
306
}
#endif

static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
{
	struct mr6_table *mrt;
	unsigned int i;

	mrt = ip6mr_get_table(net, id);
307
	if (mrt)
308
309
310
		return mrt;

	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311
	if (!mrt)
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
		return NULL;
	mrt->id = id;
	write_pnet(&mrt->net, net);

	/* Forwarding cache */
	for (i = 0; i < MFC6_LINES; i++)
		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);

	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);

	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
		    (unsigned long)mrt);

#ifdef CONFIG_IPV6_PIMSM_V2
	mrt->mroute_reg_vif_num = -1;
#endif
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
	return mrt;
}
333

334
335
static void ip6mr_free_table(struct mr6_table *mrt)
{
336
	del_timer_sync(&mrt->ipmr_expire_timer);
337
338
339
	mroute_clean_tables(mrt);
	kfree(mrt);
}
340
341
342
343

#ifdef CONFIG_PROC_FS

struct ipmr_mfc_iter {
344
	struct seq_net_private p;
345
	struct mr6_table *mrt;
346
	struct list_head *cache;
347
348
349
350
	int ct;
};


351
352
static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
					   struct ipmr_mfc_iter *it, loff_t pos)
353
{
354
	struct mr6_table *mrt = it->mrt;
355
356
357
	struct mfc6_cache *mfc;

	read_lock(&mrt_lock);
358
	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359
		it->cache = &mrt->mfc6_cache_array[it->ct];
360
		list_for_each_entry(mfc, it->cache, list)
361
362
			if (pos-- == 0)
				return mfc;
363
	}
364
365
366
	read_unlock(&mrt_lock);

	spin_lock_bh(&mfc_unres_lock);
367
	it->cache = &mrt->mfc6_unres_queue;
368
	list_for_each_entry(mfc, it->cache, list)
369
		if (pos-- == 0)
370
371
372
373
374
375
376
377
378
379
380
381
			return mfc;
	spin_unlock_bh(&mfc_unres_lock);

	it->cache = NULL;
	return NULL;
}

/*
 *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 */

struct ipmr_vif_iter {
382
	struct seq_net_private p;
383
	struct mr6_table *mrt;
384
385
386
	int ct;
};

387
388
static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
					    struct ipmr_vif_iter *iter,
389
390
					    loff_t pos)
{
391
	struct mr6_table *mrt = iter->mrt;
392
393
394

	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
		if (!MIF_EXISTS(mrt, iter->ct))
395
396
			continue;
		if (pos-- == 0)
397
			return &mrt->vif6_table[iter->ct];
398
399
400
401
402
403
404
	}
	return NULL;
}

static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
	__acquires(mrt_lock)
{
405
	struct ipmr_vif_iter *iter = seq->private;
406
	struct net *net = seq_file_net(seq);
407
408
409
	struct mr6_table *mrt;

	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410
	if (!mrt)
411
412
413
		return ERR_PTR(-ENOENT);

	iter->mrt = mrt;
414

415
	read_lock(&mrt_lock);
416
417
	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
		: SEQ_START_TOKEN;
418
419
420
421
422
}

static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct ipmr_vif_iter *iter = seq->private;
423
	struct net *net = seq_file_net(seq);
424
	struct mr6_table *mrt = iter->mrt;
425
426
427

	++*pos;
	if (v == SEQ_START_TOKEN)
428
		return ip6mr_vif_seq_idx(net, iter, 0);
429

430
431
	while (++iter->ct < mrt->maxvif) {
		if (!MIF_EXISTS(mrt, iter->ct))
432
			continue;
433
		return &mrt->vif6_table[iter->ct];
434
435
436
437
438
439
440
441
442
443
444
445
	}
	return NULL;
}

static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
	__releases(mrt_lock)
{
	read_unlock(&mrt_lock);
}

static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
446
447
	struct ipmr_vif_iter *iter = seq->private;
	struct mr6_table *mrt = iter->mrt;
448

449
450
451
452
453
454
455
456
	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
	} else {
		const struct mif_device *vif = v;
		const char *name = vif->dev ? vif->dev->name : "none";

		seq_printf(seq,
Al Viro's avatar
Al Viro committed
457
			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458
			   vif - mrt->vif6_table,
459
460
461
462
463
464
465
			   name, vif->bytes_in, vif->pkt_in,
			   vif->bytes_out, vif->pkt_out,
			   vif->flags);
	}
	return 0;
}

466
static const struct seq_operations ip6mr_vif_seq_ops = {
467
468
469
470
471
472
473
474
	.start = ip6mr_vif_seq_start,
	.next  = ip6mr_vif_seq_next,
	.stop  = ip6mr_vif_seq_stop,
	.show  = ip6mr_vif_seq_show,
};

static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
475
476
	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
			    sizeof(struct ipmr_vif_iter));
477
478
}

479
static const struct file_operations ip6mr_vif_fops = {
480
481
482
483
	.owner	 = THIS_MODULE,
	.open    = ip6mr_vif_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
484
	.release = seq_release_net,
485
486
487
488
};

static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
489
	struct ipmr_mfc_iter *it = seq->private;
490
	struct net *net = seq_file_net(seq);
491
	struct mr6_table *mrt;
492

493
	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494
	if (!mrt)
495
496
497
		return ERR_PTR(-ENOENT);

	it->mrt = mrt;
498
499
	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
		: SEQ_START_TOKEN;
500
501
502
503
504
505
}

static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct mfc6_cache *mfc = v;
	struct ipmr_mfc_iter *it = seq->private;
506
	struct net *net = seq_file_net(seq);
507
	struct mr6_table *mrt = it->mrt;
508
509
510
511

	++*pos;

	if (v == SEQ_START_TOKEN)
512
		return ipmr_mfc_seq_idx(net, seq->private, 0);
513

514
515
	if (mfc->list.next != it->cache)
		return list_entry(mfc->list.next, struct mfc6_cache, list);
516

517
	if (it->cache == &mrt->mfc6_unres_queue)
518
519
		goto end_of_list;

520
	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521

522
	while (++it->ct < MFC6_LINES) {
523
		it->cache = &mrt->mfc6_cache_array[it->ct];
524
525
526
		if (list_empty(it->cache))
			continue;
		return list_first_entry(it->cache, struct mfc6_cache, list);
527
528
529
530
	}

	/* exhausted cache_array, show unresolved */
	read_unlock(&mrt_lock);
531
	it->cache = &mrt->mfc6_unres_queue;
532
533
534
	it->ct = 0;

	spin_lock_bh(&mfc_unres_lock);
535
536
	if (!list_empty(it->cache))
		return list_first_entry(it->cache, struct mfc6_cache, list);
537
538
539
540
541
542
543
544
545
546
547

 end_of_list:
	spin_unlock_bh(&mfc_unres_lock);
	it->cache = NULL;

	return NULL;
}

static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
{
	struct ipmr_mfc_iter *it = seq->private;
548
	struct mr6_table *mrt = it->mrt;
549

550
	if (it->cache == &mrt->mfc6_unres_queue)
551
		spin_unlock_bh(&mfc_unres_lock);
552
	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
		read_unlock(&mrt_lock);
}

static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
{
	int n;

	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "Group                            "
			 "Origin                           "
			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
	} else {
		const struct mfc6_cache *mfc = v;
		const struct ipmr_mfc_iter *it = seq->private;
568
		struct mr6_table *mrt = it->mrt;
569

570
		seq_printf(seq, "%pI6 %pI6 %-3hd",
571
			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572
			   mfc->mf6c_parent);
573

574
		if (it->cache != &mrt->mfc6_unres_queue) {
575
576
577
578
			seq_printf(seq, " %8lu %8lu %8lu",
				   mfc->mfc_un.res.pkt,
				   mfc->mfc_un.res.bytes,
				   mfc->mfc_un.res.wrong_if);
579
580
			for (n = mfc->mfc_un.res.minvif;
			     n < mfc->mfc_un.res.maxvif; n++) {
581
				if (MIF_EXISTS(mrt, n) &&
582
583
584
585
586
				    mfc->mfc_un.res.ttls[n] < 255)
					seq_printf(seq,
						   " %2d:%-3d",
						   n, mfc->mfc_un.res.ttls[n]);
			}
587
588
589
590
591
		} else {
			/* unresolved mfc_caches don't contain
			 * pkt, bytes and wrong_if values
			 */
			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592
593
594
595
596
597
		}
		seq_putc(seq, '\n');
	}
	return 0;
}

598
static const struct seq_operations ipmr_mfc_seq_ops = {
599
600
601
602
603
604
605
606
	.start = ipmr_mfc_seq_start,
	.next  = ipmr_mfc_seq_next,
	.stop  = ipmr_mfc_seq_stop,
	.show  = ipmr_mfc_seq_show,
};

static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
607
608
	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
			    sizeof(struct ipmr_mfc_iter));
609
610
}

611
static const struct file_operations ip6mr_mfc_fops = {
612
613
614
615
	.owner	 = THIS_MODULE,
	.open    = ipmr_mfc_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
616
	.release = seq_release_net,
617
618
619
};
#endif

620
621
622
623
624
625
626
#ifdef CONFIG_IPV6_PIMSM_V2

static int pim6_rcv(struct sk_buff *skb)
{
	struct pimreghdr *pim;
	struct ipv6hdr   *encap;
	struct net_device  *reg_dev = NULL;
627
	struct net *net = dev_net(skb->dev);
628
	struct mr6_table *mrt;
629
630
631
	struct flowi6 fl6 = {
		.flowi6_iif	= skb->dev->ifindex,
		.flowi6_mark	= skb->mark,
632
633
	};
	int reg_vif_num;
634
635
636
637
638
639
640

	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
		goto drop;

	pim = (struct pimreghdr *)skb_transport_header(skb);
	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
	    (pim->flags & PIM_NULL_REGISTER) ||
641
642
643
	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
			     sizeof(*pim), IPPROTO_PIM,
			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
644
	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645
646
647
648
649
650
651
652
653
654
655
		goto drop;

	/* check if the inner packet is destined to mcast group */
	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
				   sizeof(*pim));

	if (!ipv6_addr_is_multicast(&encap->daddr) ||
	    encap->payload_len == 0 ||
	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
		goto drop;

656
	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657
658
659
		goto drop;
	reg_vif_num = mrt->mroute_reg_vif_num;

660
661
	read_lock(&mrt_lock);
	if (reg_vif_num >= 0)
662
		reg_dev = mrt->vif6_table[reg_vif_num].dev;
663
664
665
666
	if (reg_dev)
		dev_hold(reg_dev);
	read_unlock(&mrt_lock);

667
	if (!reg_dev)
668
669
670
671
672
		goto drop;

	skb->mac_header = skb->network_header;
	skb_pull(skb, (u8 *)encap - skb->data);
	skb_reset_network_header(skb);
673
	skb->protocol = htons(ETH_P_IPV6);
674
	skb->ip_summed = CHECKSUM_NONE;
675

676
	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
677

678
	netif_rx(skb);
Eric Dumazet's avatar
Eric Dumazet committed
679

680
681
682
683
684
685
686
	dev_put(reg_dev);
	return 0;
 drop:
	kfree_skb(skb);
	return 0;
}

687
static const struct inet6_protocol pim6_protocol = {
688
689
690
691
692
	.handler	=	pim6_rcv,
};

/* Service routines creating virtual interfaces: PIMREG */

693
694
static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
				      struct net_device *dev)
695
{
696
	struct net *net = dev_net(dev);
697
	struct mr6_table *mrt;
698
699
	struct flowi6 fl6 = {
		.flowi6_oif	= dev->ifindex,
700
		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
701
		.flowi6_mark	= skb->mark,
702
703
704
	};
	int err;

705
	err = ip6mr_fib_lookup(net, &fl6, &mrt);
706
707
	if (err < 0) {
		kfree_skb(skb);
708
		return err;
709
	}
710

711
	read_lock(&mrt_lock);
712
713
	dev->stats.tx_bytes += skb->len;
	dev->stats.tx_packets++;
714
	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
715
716
	read_unlock(&mrt_lock);
	kfree_skb(skb);
717
	return NETDEV_TX_OK;
718
719
}

720
721
722
723
724
static int reg_vif_get_iflink(const struct net_device *dev)
{
	return 0;
}

725
726
static const struct net_device_ops reg_vif_netdev_ops = {
	.ndo_start_xmit	= reg_vif_xmit,
727
	.ndo_get_iflink = reg_vif_get_iflink,
728
729
};

730
731
732
733
734
static void reg_vif_setup(struct net_device *dev)
{
	dev->type		= ARPHRD_PIMREG;
	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
	dev->flags		= IFF_NOARP;
735
	dev->netdev_ops		= &reg_vif_netdev_ops;
736
	dev->destructor		= free_netdev;
Tom Goff's avatar
Tom Goff committed
737
	dev->features		|= NETIF_F_NETNS_LOCAL;
738
739
}

740
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
741
742
{
	struct net_device *dev;
743
744
745
746
747
748
	char name[IFNAMSIZ];

	if (mrt->id == RT6_TABLE_DFLT)
		sprintf(name, "pim6reg");
	else
		sprintf(name, "pim6reg%u", mrt->id);
749

750
	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
751
	if (!dev)
752
753
		return NULL;

754
755
	dev_net_set(dev, net);

756
757
758
759
760
761
762
763
	if (register_netdevice(dev)) {
		free_netdev(dev);
		return NULL;
	}

	if (dev_open(dev))
		goto failure;

Wang Chen's avatar
Wang Chen committed
764
	dev_hold(dev);
765
766
767
768
769
770
771
772
773
774
775
776
	return dev;

failure:
	/* allow the register to be completed before unregistering. */
	rtnl_unlock();
	rtnl_lock();

	unregister_netdevice(dev);
	return NULL;
}
#endif

777
778
779
780
/*
 *	Delete a VIF entry
 */

781
static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
782
783
784
{
	struct mif_device *v;
	struct net_device *dev;
785
	struct inet6_dev *in6_dev;
786
787

	if (vifi < 0 || vifi >= mrt->maxvif)
788
789
		return -EADDRNOTAVAIL;

790
	v = &mrt->vif6_table[vifi];
791
792
793
794
795
796
797
798
799
800

	write_lock_bh(&mrt_lock);
	dev = v->dev;
	v->dev = NULL;

	if (!dev) {
		write_unlock_bh(&mrt_lock);
		return -EADDRNOTAVAIL;
	}

801
#ifdef CONFIG_IPV6_PIMSM_V2
802
803
	if (vifi == mrt->mroute_reg_vif_num)
		mrt->mroute_reg_vif_num = -1;
804
805
#endif

806
	if (vifi + 1 == mrt->maxvif) {
807
808
		int tmp;
		for (tmp = vifi - 1; tmp >= 0; tmp--) {
809
			if (MIF_EXISTS(mrt, tmp))
810
811
				break;
		}
812
		mrt->maxvif = tmp + 1;
813
814
815
816
817
818
	}

	write_unlock_bh(&mrt_lock);

	dev_set_allmulti(dev, -1);

819
	in6_dev = __in6_dev_get(dev);
820
	if (in6_dev) {
821
		in6_dev->cnf.mc_forwarding--;
822
823
824
825
		inet6_netconf_notify_devconf(dev_net(dev),
					     NETCONFA_MC_FORWARDING,
					     dev->ifindex, &in6_dev->cnf);
	}
826

827
	if (v->flags & MIFF_REGISTER)
828
		unregister_netdevice_queue(dev, head);
829
830
831
832
833

	dev_put(dev);
	return 0;
}

834
835
836
837
838
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
	kmem_cache_free(mrt_cachep, c);
}

839
840
841
842
/* Destroy an unresolved cache entry, killing queued skbs
   and reporting error to netlink readers.
 */

843
static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
844
{
845
	struct net *net = read_pnet(&mrt->net);
846
847
	struct sk_buff *skb;

848
	atomic_dec(&mrt->cache_resolve_queue_len);
849

850
	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
851
852
853
		if (ipv6_hdr(skb)->version == 0) {
			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
			nlh->nlmsg_type = NLMSG_ERROR;
854
			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
855
			skb_trim(skb, nlh->nlmsg_len);
856
			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
857
			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
858
859
860
861
		} else
			kfree_skb(skb);
	}

862
	ip6mr_cache_free(c);
863
864
865
}


866
/* Timer process for all the unresolved queue. */
867

868
static void ipmr_do_expire_process(struct mr6_table *mrt)
869
870
871
{
	unsigned long now = jiffies;
	unsigned long expires = 10 * HZ;
872
	struct mfc6_cache *c, *next;
873

874
	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
875
876
877
878
879
880
881
882
		if (time_after(c->mfc_un.unres.expires, now)) {
			/* not yet... */
			unsigned long interval = c->mfc_un.unres.expires - now;
			if (interval < expires)
				expires = interval;
			continue;
		}

883
		list_del(&c->list);
884
		mr6_netlink_event(mrt, c, RTM_DELROUTE);
885
		ip6mr_destroy_unres(mrt, c);
886
887
	}

888
889
	if (!list_empty(&mrt->mfc6_unres_queue))
		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
890
891
}

892
static void ipmr_expire_process(unsigned long arg)
893
{
894
	struct mr6_table *mrt = (struct mr6_table *)arg;
895

896
	if (!spin_trylock(&mfc_unres_lock)) {
897
		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
898
899
900
		return;
	}

901
902
	if (!list_empty(&mrt->mfc6_unres_queue))
		ipmr_do_expire_process(mrt);
903
904
905
906
907
908

	spin_unlock(&mfc_unres_lock);
}

/* Fill oifs list. It is called under write locked mrt_lock. */

909
static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
910
				    unsigned char *ttls)
911
912
913
{
	int vifi;

914
	cache->mfc_un.res.minvif = MAXMIFS;
915
	cache->mfc_un.res.maxvif = 0;
916
	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
917

918
919
	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
		if (MIF_EXISTS(mrt, vifi) &&
920
		    ttls[vifi] && ttls[vifi] < 255) {
921
922
923
924
925
926
927
928
929
			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
			if (cache->mfc_un.res.minvif > vifi)
				cache->mfc_un.res.minvif = vifi;
			if (cache->mfc_un.res.maxvif <= vifi)
				cache->mfc_un.res.maxvif = vifi + 1;
		}
	}
}

930
931
static int mif6_add(struct net *net, struct mr6_table *mrt,
		    struct mif6ctl *vifc, int mrtsock)
932
933
{
	int vifi = vifc->mif6c_mifi;
934
	struct mif_device *v = &mrt->vif6_table[vifi];
935
	struct net_device *dev;
936
	struct inet6_dev *in6_dev;
937
	int err;
938
939

	/* Is vif busy ? */
940
	if (MIF_EXISTS(mrt, vifi))
941
942
943
		return -EADDRINUSE;

	switch (vifc->mif6c_flags) {
944
945
946
947
948
949
#ifdef CONFIG_IPV6_PIMSM_V2
	case MIFF_REGISTER:
		/*
		 * Special Purpose VIF in PIM
		 * All the packets will be sent to the daemon
		 */
950
		if (mrt->mroute_reg_vif_num >= 0)
951
			return -EADDRINUSE;
952
		dev = ip6mr_reg_vif(net, mrt);
953
954
		if (!dev)
			return -ENOBUFS;
955
956
957
		err = dev_set_allmulti(dev, 1);
		if (err) {
			unregister_netdevice(dev);
Wang Chen's avatar
Wang Chen committed
958
			dev_put(dev);
959
960
			return err;
		}
961
962
		break;
#endif
963
	case 0:
964
		dev = dev_get_by_index(net, vifc->mif6c_pifi);
965
966
		if (!dev)
			return -EADDRNOTAVAIL;
967
		err = dev_set_allmulti(dev, 1);
Wang Chen's avatar
Wang Chen committed
968
969
		if (err) {
			dev_put(dev);
970
			return err;
Wang Chen's avatar
Wang Chen committed
971
		}
972
973
974
975
976
		break;
	default:
		return -EINVAL;
	}

977
	in6_dev = __in6_dev_get(dev);
978
	if (in6_dev) {
979
		in6_dev->cnf.mc_forwarding++;
980
981
982
983
		inet6_netconf_notify_devconf(dev_net(dev),
					     NETCONFA_MC_FORWARDING,
					     dev->ifindex, &in6_dev->cnf);
	}
984

985
986
987
988
989
990
991
992
993
994
995
996
997
998
	/*
	 *	Fill in the VIF structures
	 */
	v->rate_limit = vifc->vifc_rate_limit;
	v->flags = vifc->mif6c_flags;
	if (!mrtsock)
		v->flags |= VIFF_STATIC;
	v->threshold = vifc->vifc_threshold;
	v->bytes_in = 0;
	v->bytes_out = 0;
	v->pkt_in = 0;
	v->pkt_out = 0;
	v->link = dev->ifindex;
	if (v->flags & MIFF_REGISTER)
999
		v->link = dev_get_iflink(dev);
1000

For faster browsing, not all history is shown. View entire blame