xfrm_policy.c 80 KB
Newer Older
1
/*
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
5
6
7
8
9
10
11
12
 * xfrm_policy.c
 *
 * Changes:
 *	Mitsuru KANDA @USAGI
 * 	Kazunori MIYAZAWA @USAGI
 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
 * 		IPv6 support
 * 	Kazunori MIYAZAWA @USAGI
 * 	YOSHIFUJI Hideaki
 * 		Split up af-specific portion
 *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13
 *
Linus Torvalds's avatar
Linus Torvalds committed
14
15
 */

16
#include <linux/err.h>
Linus Torvalds's avatar
Linus Torvalds committed
17
18
19
20
21
22
23
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
24
#include <linux/netfilter.h>
Linus Torvalds's avatar
Linus Torvalds committed
25
#include <linux/module.h>
26
#include <linux/cache.h>
Paul Moore's avatar
Paul Moore committed
27
#include <linux/audit.h>
28
#include <net/dst.h>
29
#include <net/flow.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
31
#include <net/xfrm.h>
#include <net/ip.h>
32
33
34
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
Linus Torvalds's avatar
Linus Torvalds committed
35

36
37
#include "xfrm_hash.h"

38
39
40
41
#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN	100

42
43
44
45
46
struct xfrm_flo {
	struct dst_entry *dst_orig;
	u8 flags;
};

47
48
49
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
						__read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
50

51
static struct kmem_cache *xfrm_dst_cache __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
52

53
static void xfrm_init_pmtu(struct dst_entry *dst);
54
static int stale_bundle(struct dst_entry *dst);
55
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56
static void xfrm_policy_queue_process(unsigned long arg);
Linus Torvalds's avatar
Linus Torvalds committed
57

58
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
59
60
61
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
						int dir);

62
static inline bool
63
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64
{
65
66
	const struct flowi4 *fl4 = &fl->u.ip4;

67
68
	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69
70
71
72
		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73
74
}

75
static inline bool
76
__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77
{
78
79
80
81
82
83
84
85
	const struct flowi6 *fl6 = &fl->u.ip6;

	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86
87
}

88
89
bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
			 unsigned short family)
90
91
92
93
94
95
96
{
	switch (family) {
	case AF_INET:
		return __xfrm4_selector_match(sel, fl);
	case AF_INET6:
		return __xfrm6_selector_match(sel, fl);
	}
97
	return false;
98
99
}

Eric Dumazet's avatar
Eric Dumazet committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
{
	struct xfrm_policy_afinfo *afinfo;

	if (unlikely(family >= NPROTO))
		return NULL;
	rcu_read_lock();
	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
	if (unlikely(!afinfo))
		rcu_read_unlock();
	return afinfo;
}

static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
	rcu_read_unlock();
}

David Ahern's avatar
David Ahern committed
118
119
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
						  int tos, int oif,
120
121
						  const xfrm_address_t *saddr,
						  const xfrm_address_t *daddr,
122
123
124
125
126
127
128
129
130
						  int family)
{
	struct xfrm_policy_afinfo *afinfo;
	struct dst_entry *dst;

	afinfo = xfrm_policy_get_afinfo(family);
	if (unlikely(afinfo == NULL))
		return ERR_PTR(-EAFNOSUPPORT);

David Ahern's avatar
David Ahern committed
131
	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
132
133
134
135
136
137

	xfrm_policy_put_afinfo(afinfo);

	return dst;
}

David Ahern's avatar
David Ahern committed
138
139
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
						int tos, int oif,
140
141
						xfrm_address_t *prev_saddr,
						xfrm_address_t *prev_daddr,
142
						int family)
Linus Torvalds's avatar
Linus Torvalds committed
143
{
144
	struct net *net = xs_net(x);
145
146
147
148
	xfrm_address_t *saddr = &x->props.saddr;
	xfrm_address_t *daddr = &x->id.daddr;
	struct dst_entry *dst;

149
	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
150
		saddr = x->coaddr;
151
152
153
154
		daddr = prev_daddr;
	}
	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
		saddr = prev_saddr;
155
		daddr = x->coaddr;
156
	}
Linus Torvalds's avatar
Linus Torvalds committed
157

David Ahern's avatar
David Ahern committed
158
	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
159
160
161
162
163
164
165

	if (!IS_ERR(dst)) {
		if (prev_saddr != saddr)
			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
		if (prev_daddr != daddr)
			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
	}
Linus Torvalds's avatar
Linus Torvalds committed
166

167
	return dst;
Linus Torvalds's avatar
Linus Torvalds committed
168
169
170
171
172
173
174
}

static inline unsigned long make_jiffies(long secs)
{
	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
		return MAX_SCHEDULE_TIMEOUT-1;
	else
175
		return secs*HZ;
Linus Torvalds's avatar
Linus Torvalds committed
176
177
178
179
}

static void xfrm_policy_timer(unsigned long data)
{
180
	struct xfrm_policy *xp = (struct xfrm_policy *)data;
181
	unsigned long now = get_seconds();
Linus Torvalds's avatar
Linus Torvalds committed
182
183
184
185
186
187
	long next = LONG_MAX;
	int warn = 0;
	int dir;

	read_lock(&xp->lock);

188
	if (unlikely(xp->walk.dead))
Linus Torvalds's avatar
Linus Torvalds committed
189
190
		goto out;

191
	dir = xfrm_policy_id2dir(xp->index);
Linus Torvalds's avatar
Linus Torvalds committed
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

	if (xp->lft.hard_add_expires_seconds) {
		long tmo = xp->lft.hard_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.hard_use_expires_seconds) {
		long tmo = xp->lft.hard_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0)
			goto expired;
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_add_expires_seconds) {
		long tmo = xp->lft.soft_add_expires_seconds +
			xp->curlft.add_time - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}
	if (xp->lft.soft_use_expires_seconds) {
		long tmo = xp->lft.soft_use_expires_seconds +
			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
		if (tmo <= 0) {
			warn = 1;
			tmo = XFRM_KM_TIMEOUT;
		}
		if (tmo < next)
			next = tmo;
	}

	if (warn)
231
		km_policy_expired(xp, dir, 0, 0);
Linus Torvalds's avatar
Linus Torvalds committed
232
233
234
235
236
237
238
239
240
241
242
	if (next != LONG_MAX &&
	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
		xfrm_pol_hold(xp);

out:
	read_unlock(&xp->lock);
	xfrm_pol_put(xp);
	return;

expired:
	read_unlock(&xp->lock);
243
	if (!xfrm_policy_delete(xp, dir))
244
		km_policy_expired(xp, dir, 1, 0);
Linus Torvalds's avatar
Linus Torvalds committed
245
246
247
	xfrm_pol_put(xp);
}

248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	if (unlikely(pol->walk.dead))
		flo = NULL;
	else
		xfrm_pol_hold(pol);

	return flo;
}

static int xfrm_policy_flo_check(struct flow_cache_object *flo)
{
	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);

	return !pol->walk.dead;
}

static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
{
	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
}

static const struct flow_cache_ops xfrm_policy_fc_ops = {
	.get = xfrm_policy_flo_get,
	.check = xfrm_policy_flo_check,
	.delete = xfrm_policy_flo_delete,
};
Linus Torvalds's avatar
Linus Torvalds committed
277
278
279
280
281

/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 * SPD calls.
 */

282
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
Linus Torvalds's avatar
Linus Torvalds committed
283
284
285
{
	struct xfrm_policy *policy;

286
	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
Linus Torvalds's avatar
Linus Torvalds committed
287
288

	if (policy) {
289
		write_pnet(&policy->xp_net, net);
290
		INIT_LIST_HEAD(&policy->walk.all);
291
292
		INIT_HLIST_NODE(&policy->bydst);
		INIT_HLIST_NODE(&policy->byidx);
Linus Torvalds's avatar
Linus Torvalds committed
293
		rwlock_init(&policy->lock);
294
		atomic_set(&policy->refcnt, 1);
295
		skb_queue_head_init(&policy->polq.hold_queue);
296
297
		setup_timer(&policy->timer, xfrm_policy_timer,
				(unsigned long)policy);
298
299
		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
			    (unsigned long)policy);
300
		policy->flo.ops = &xfrm_policy_fc_ops;
Linus Torvalds's avatar
Linus Torvalds committed
301
302
303
304
305
306
307
	}
	return policy;
}
EXPORT_SYMBOL(xfrm_policy_alloc);

/* Destroy xfrm_policy: descendant resources must be released to this moment. */

308
void xfrm_policy_destroy(struct xfrm_policy *policy)
Linus Torvalds's avatar
Linus Torvalds committed
309
{
310
	BUG_ON(!policy->walk.dead);
Linus Torvalds's avatar
Linus Torvalds committed
311

312
	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
Linus Torvalds's avatar
Linus Torvalds committed
313
314
		BUG();

315
	security_xfrm_policy_free(policy->security);
Linus Torvalds's avatar
Linus Torvalds committed
316
317
	kfree(policy);
}
318
EXPORT_SYMBOL(xfrm_policy_destroy);
Linus Torvalds's avatar
Linus Torvalds committed
319
320
321
322
323
324
325

/* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */

static void xfrm_policy_kill(struct xfrm_policy *policy)
{
326
	policy->walk.dead = 1;
Linus Torvalds's avatar
Linus Torvalds committed
327

328
	atomic_inc(&policy->genid);
Linus Torvalds's avatar
Linus Torvalds committed
329

330
331
	if (del_timer(&policy->polq.hold_timer))
		xfrm_pol_put(policy);
332
	skb_queue_purge(&policy->polq.hold_queue);
333

334
335
336
337
	if (del_timer(&policy->timer))
		xfrm_pol_put(policy);

	xfrm_pol_put(policy);
Linus Torvalds's avatar
Linus Torvalds committed
338
339
}

340
341
static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;

342
static inline unsigned int idx_hash(struct net *net, u32 index)
343
{
344
	return __idx_hash(index, net->xfrm.policy_idx_hmask);
345
346
}

347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/* calculate policy hash thresholds */
static void __get_hash_thresh(struct net *net,
			      unsigned short family, int dir,
			      u8 *dbits, u8 *sbits)
{
	switch (family) {
	case AF_INET:
		*dbits = net->xfrm.policy_bydst[dir].dbits4;
		*sbits = net->xfrm.policy_bydst[dir].sbits4;
		break;

	case AF_INET6:
		*dbits = net->xfrm.policy_bydst[dir].dbits6;
		*sbits = net->xfrm.policy_bydst[dir].sbits6;
		break;

	default:
		*dbits = 0;
		*sbits = 0;
	}
}

369
370
371
static struct hlist_head *policy_hash_bysel(struct net *net,
					    const struct xfrm_selector *sel,
					    unsigned short family, int dir)
372
{
373
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
374
375
376
377
378
379
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __sel_hash(sel, family, hmask, dbits, sbits);
380
381

	return (hash == hmask + 1 ?
382
383
		&net->xfrm.policy_inexact[dir] :
		net->xfrm.policy_bydst[dir].table + hash);
384
385
}

386
387
388
389
static struct hlist_head *policy_hash_direct(struct net *net,
					     const xfrm_address_t *daddr,
					     const xfrm_address_t *saddr,
					     unsigned short family, int dir)
390
{
391
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
392
393
394
395
396
397
	unsigned int hash;
	u8 dbits;
	u8 sbits;

	__get_hash_thresh(net, family, dir, &dbits, &sbits);
	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
398

399
	return net->xfrm.policy_bydst[dir].table + hash;
400
401
}

402
403
static void xfrm_dst_hash_transfer(struct net *net,
				   struct hlist_head *list,
404
				   struct hlist_head *ndsttable,
405
406
				   unsigned int nhashmask,
				   int dir)
407
{
408
	struct hlist_node *tmp, *entry0 = NULL;
409
	struct xfrm_policy *pol;
410
	unsigned int h0 = 0;
411
412
	u8 dbits;
	u8 sbits;
413

414
redo:
415
	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
416
417
		unsigned int h;

418
		__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
419
		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
420
				pol->family, nhashmask, dbits, sbits);
421
		if (!entry0) {
422
			hlist_del(&pol->bydst);
423
424
425
426
427
			hlist_add_head(&pol->bydst, ndsttable+h);
			h0 = h;
		} else {
			if (h != h0)
				continue;
428
			hlist_del(&pol->bydst);
429
			hlist_add_behind(&pol->bydst, entry0);
430
		}
431
		entry0 = &pol->bydst;
432
433
434
435
	}
	if (!hlist_empty(list)) {
		entry0 = NULL;
		goto redo;
436
437
438
439
440
441
442
	}
}

static void xfrm_idx_hash_transfer(struct hlist_head *list,
				   struct hlist_head *nidxtable,
				   unsigned int nhashmask)
{
443
	struct hlist_node *tmp;
444
445
	struct xfrm_policy *pol;

446
	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
447
448
449
450
451
452
453
454
455
456
457
458
		unsigned int h;

		h = __idx_hash(pol->index, nhashmask);
		hlist_add_head(&pol->byidx, nidxtable+h);
	}
}

static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
{
	return ((old_hmask + 1) << 1) - 1;
}

459
static void xfrm_bydst_resize(struct net *net, int dir)
460
{
461
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
462
463
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
464
	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
465
	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
466
467
468
469
470
	int i;

	if (!ndst)
		return;

471
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
472
473

	for (i = hmask; i >= 0; i--)
474
		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
475

476
477
	net->xfrm.policy_bydst[dir].table = ndst;
	net->xfrm.policy_bydst[dir].hmask = nhashmask;
478

479
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
480

481
	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
482
483
}

484
static void xfrm_byidx_resize(struct net *net, int total)
485
{
486
	unsigned int hmask = net->xfrm.policy_idx_hmask;
487
488
	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
489
	struct hlist_head *oidx = net->xfrm.policy_byidx;
490
	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
491
492
493
494
495
	int i;

	if (!nidx)
		return;

496
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
497
498
499
500

	for (i = hmask; i >= 0; i--)
		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);

501
502
	net->xfrm.policy_byidx = nidx;
	net->xfrm.policy_idx_hmask = nhashmask;
503

504
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
505

506
	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
507
508
}

509
static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
510
{
511
512
	unsigned int cnt = net->xfrm.policy_count[dir];
	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
513
514
515
516
517
518
519
520
521
522
523

	if (total)
		*total += cnt;

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    cnt > hmask)
		return 1;

	return 0;
}

524
static inline int xfrm_byidx_should_resize(struct net *net, int total)
525
{
526
	unsigned int hmask = net->xfrm.policy_idx_hmask;
527
528
529
530
531
532
533
534

	if ((hmask + 1) < xfrm_policy_hashmax &&
	    total > hmask)
		return 1;

	return 0;
}

535
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
Jamal Hadi Salim's avatar
Jamal Hadi Salim committed
536
{
537
	read_lock_bh(&net->xfrm.xfrm_policy_lock);
538
539
540
541
542
543
544
	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
	si->spdhcnt = net->xfrm.policy_idx_hmask;
Jamal Hadi Salim's avatar
Jamal Hadi Salim committed
545
	si->spdhmcnt = xfrm_policy_hashmax;
546
	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
Jamal Hadi Salim's avatar
Jamal Hadi Salim committed
547
548
}
EXPORT_SYMBOL(xfrm_spd_getinfo);
549

Jamal Hadi Salim's avatar
Jamal Hadi Salim committed
550
static DEFINE_MUTEX(hash_resize_mutex);
551
static void xfrm_hash_resize(struct work_struct *work)
552
{
553
	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
554
555
556
557
558
	int dir, total;

	mutex_lock(&hash_resize_mutex);

	total = 0;
Herbert Xu's avatar
Herbert Xu committed
559
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
560
561
		if (xfrm_bydst_should_resize(net, dir, &total))
			xfrm_bydst_resize(net, dir);
562
	}
563
564
	if (xfrm_byidx_should_resize(net, total))
		xfrm_byidx_resize(net, total);
565
566
567
568

	mutex_unlock(&hash_resize_mutex);
}

569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
static void xfrm_hash_rebuild(struct work_struct *work)
{
	struct net *net = container_of(work, struct net,
				       xfrm.policy_hthresh.work);
	unsigned int hmask;
	struct xfrm_policy *pol;
	struct xfrm_policy *policy;
	struct hlist_head *chain;
	struct hlist_head *odst;
	struct hlist_node *newpos;
	int i;
	int dir;
	unsigned seq;
	u8 lbits4, rbits4, lbits6, rbits6;

	mutex_lock(&hash_resize_mutex);

	/* read selector prefixlen thresholds */
	do {
		seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);

		lbits4 = net->xfrm.policy_hthresh.lbits4;
		rbits4 = net->xfrm.policy_hthresh.rbits4;
		lbits6 = net->xfrm.policy_hthresh.lbits6;
		rbits6 = net->xfrm.policy_hthresh.rbits6;
	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));

	write_lock_bh(&net->xfrm.xfrm_policy_lock);

	/* reset the bydst and inexact table in all directions */
Herbert Xu's avatar
Herbert Xu committed
599
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
		hmask = net->xfrm.policy_bydst[dir].hmask;
		odst = net->xfrm.policy_bydst[dir].table;
		for (i = hmask; i >= 0; i--)
			INIT_HLIST_HEAD(odst + i);
		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
			/* dir out => dst = remote, src = local */
			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
			net->xfrm.policy_bydst[dir].sbits4 = lbits4;
			net->xfrm.policy_bydst[dir].dbits6 = rbits6;
			net->xfrm.policy_bydst[dir].sbits6 = lbits6;
		} else {
			/* dir in/fwd => dst = local, src = remote */
			net->xfrm.policy_bydst[dir].dbits4 = lbits4;
			net->xfrm.policy_bydst[dir].sbits4 = rbits4;
			net->xfrm.policy_bydst[dir].dbits6 = lbits6;
			net->xfrm.policy_bydst[dir].sbits6 = rbits6;
		}
	}

	/* re-insert all policies by order of creation */
	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
		newpos = NULL;
		chain = policy_hash_bysel(net, &policy->selector,
					  policy->family,
					  xfrm_policy_id2dir(policy->index));
		hlist_for_each_entry(pol, chain, bydst) {
			if (policy->priority >= pol->priority)
				newpos = &pol->bydst;
			else
				break;
		}
		if (newpos)
			hlist_add_behind(&policy->bydst, newpos);
		else
			hlist_add_head(&policy->bydst, chain);
	}

	write_unlock_bh(&net->xfrm.xfrm_policy_lock);

	mutex_unlock(&hash_resize_mutex);
}

void xfrm_policy_hash_rebuild(struct net *net)
{
	schedule_work(&net->xfrm.policy_hthresh.work);
}
EXPORT_SYMBOL(xfrm_policy_hash_rebuild);

Linus Torvalds's avatar
Linus Torvalds committed
649
650
/* Generate new index... KAME seems to generate them ordered by cost
 * of an absolute inpredictability of ordering of rules. This will not pass. */
651
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
Linus Torvalds's avatar
Linus Torvalds committed
652
653
654
655
{
	static u32 idx_generator;

	for (;;) {
656
657
658
659
660
		struct hlist_head *list;
		struct xfrm_policy *p;
		u32 idx;
		int found;

661
662
663
664
665
666
667
668
		if (!index) {
			idx = (idx_generator | dir);
			idx_generator += 8;
		} else {
			idx = index;
			index = 0;
		}

Linus Torvalds's avatar
Linus Torvalds committed
669
670
		if (idx == 0)
			idx = 8;
671
		list = net->xfrm.policy_byidx + idx_hash(net, idx);
672
		found = 0;
673
		hlist_for_each_entry(p, list, byidx) {
674
675
			if (p->index == idx) {
				found = 1;
Linus Torvalds's avatar
Linus Torvalds committed
676
				break;
677
			}
Linus Torvalds's avatar
Linus Torvalds committed
678
		}
679
		if (!found)
Linus Torvalds's avatar
Linus Torvalds committed
680
681
682
683
			return idx;
	}
}

684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
{
	u32 *p1 = (u32 *) s1;
	u32 *p2 = (u32 *) s2;
	int len = sizeof(struct xfrm_selector) / sizeof(u32);
	int i;

	for (i = 0; i < len; i++) {
		if (p1[i] != p2[i])
			return 1;
	}

	return 0;
}

699
700
701
702
703
704
static void xfrm_policy_requeue(struct xfrm_policy *old,
				struct xfrm_policy *new)
{
	struct xfrm_policy_queue *pq = &old->polq;
	struct sk_buff_head list;

705
706
707
	if (skb_queue_empty(&pq->hold_queue))
		return;

708
709
710
711
	__skb_queue_head_init(&list);

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice_init(&pq->hold_queue, &list);
712
713
	if (del_timer(&pq->hold_timer))
		xfrm_pol_put(old);
714
715
716
717
718
719
720
	spin_unlock_bh(&pq->hold_queue.lock);

	pq = &new->polq;

	spin_lock_bh(&pq->hold_queue.lock);
	skb_queue_splice(&list, &pq->hold_queue);
	pq->timeout = XFRM_QUEUE_TMO_MIN;
721
722
	if (!mod_timer(&pq->hold_timer, jiffies))
		xfrm_pol_hold(new);
723
724
725
	spin_unlock_bh(&pq->hold_queue.lock);
}

726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
				   struct xfrm_policy *pol)
{
	u32 mark = policy->mark.v & policy->mark.m;

	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
		return true;

	if ((mark & pol->mark.m) == pol->mark.v &&
	    policy->priority == pol->priority)
		return true;

	return false;
}

Linus Torvalds's avatar
Linus Torvalds committed
741
742
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
743
	struct net *net = xp_net(policy);
744
745
746
	struct xfrm_policy *pol;
	struct xfrm_policy *delpol;
	struct hlist_head *chain;
747
	struct hlist_node *newpos;
Linus Torvalds's avatar
Linus Torvalds committed
748

749
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
750
	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
751
752
	delpol = NULL;
	newpos = NULL;
753
	hlist_for_each_entry(pol, chain, bydst) {
Herbert Xu's avatar
Herbert Xu committed
754
		if (pol->type == policy->type &&
755
		    !selector_cmp(&pol->selector, &policy->selector) &&
756
		    xfrm_policy_mark_match(policy, pol) &&
Herbert Xu's avatar
Herbert Xu committed
757
758
		    xfrm_sec_ctx_match(pol->security, policy->security) &&
		    !WARN_ON(delpol)) {
Linus Torvalds's avatar
Linus Torvalds committed
759
			if (excl) {
760
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
Linus Torvalds's avatar
Linus Torvalds committed
761
762
763
764
765
766
				return -EEXIST;
			}
			delpol = pol;
			if (policy->priority > pol->priority)
				continue;
		} else if (policy->priority >= pol->priority) {
Herbert Xu's avatar
Herbert Xu committed
767
			newpos = &pol->bydst;
Linus Torvalds's avatar
Linus Torvalds committed
768
769
770
771
772
773
			continue;
		}
		if (delpol)
			break;
	}
	if (newpos)
774
		hlist_add_behind(&policy->bydst, newpos);
775
776
	else
		hlist_add_head(&policy->bydst, chain);
777
	__xfrm_policy_link(policy, dir);
778
	atomic_inc(&net->xfrm.flow_cache_genid);
fan.du's avatar
fan.du committed
779
780
781
782
783
784
785

	/* After previous checking, family can either be AF_INET or AF_INET6 */
	if (policy->family == AF_INET)
		rt_genid_bump_ipv4(net);
	else
		rt_genid_bump_ipv6(net);

786
787
	if (delpol) {
		xfrm_policy_requeue(delpol, policy);
788
		__xfrm_policy_unlink(delpol, dir);
789
	}
790
	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
791
	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
792
	policy->curlft.add_time = get_seconds();
Linus Torvalds's avatar
Linus Torvalds committed
793
794
795
	policy->curlft.use_time = 0;
	if (!mod_timer(&policy->timer, jiffies + HZ))
		xfrm_pol_hold(policy);
796
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
Linus Torvalds's avatar
Linus Torvalds committed
797

798
	if (delpol)
Linus Torvalds's avatar
Linus Torvalds committed
799
		xfrm_policy_kill(delpol);
800
801
	else if (xfrm_bydst_should_resize(net, dir, NULL))
		schedule_work(&net->xfrm.policy_hash_work);
802

Linus Torvalds's avatar
Linus Torvalds committed
803
804
805
806
	return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);

807
808
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
					  int dir, struct xfrm_selector *sel,
809
810
					  struct xfrm_sec_ctx *ctx, int delete,
					  int *err)
Linus Torvalds's avatar
Linus Torvalds committed
811
{
812
813
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
Linus Torvalds's avatar
Linus Torvalds committed
814

815
	*err = 0;
816
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
817
	chain = policy_hash_bysel(net, sel, sel->family, dir);
818
	ret = NULL;
819
	hlist_for_each_entry(pol, chain, bydst) {
820
		if (pol->type == type &&
Jamal Hadi Salim's avatar
Jamal Hadi Salim committed
821
		    (mark & pol->mark.m) == pol->mark.v &&
822
823
		    !selector_cmp(sel, &pol->selector) &&
		    xfrm_sec_ctx_match(ctx, pol->security)) {
Linus Torvalds's avatar
Linus Torvalds committed
824
			xfrm_pol_hold(pol);
825
			if (delete) {
826
827
				*err = security_xfrm_policy_delete(
								pol->security);
828
				if (*err) {
829
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
830
831
					return pol;
				}
832
				__xfrm_policy_unlink(pol, dir);
833
834
			}
			ret = pol;
Linus Torvalds's avatar
Linus Torvalds committed
835
836
837
			break;
		}
	}
838
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
Linus Torvalds's avatar
Linus Torvalds committed
839

840
	if (ret && delete)
841
842
		xfrm_policy_kill(ret);
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
843
}
844
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
Linus Torvalds's avatar
Linus Torvalds committed
845

846
847
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
				     int dir, u32 id, int delete, int *err)
Linus Torvalds's avatar
Linus Torvalds committed
848
{
849
850
	struct xfrm_policy *pol, *ret;
	struct hlist_head *chain;
Linus Torvalds's avatar
Linus Torvalds committed
851

852
853
854
855
	*err = -ENOENT;
	if (xfrm_policy_id2dir(id) != dir)
		return NULL;

856
	*err = 0;
857
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
858
	chain = net->xfrm.policy_byidx + idx_hash(net, id);
859
	ret = NULL;
860
	hlist_for_each_entry(pol, chain, byidx) {
Jamal Hadi Salim's avatar
Jamal Hadi Salim committed
861
862
		if (pol->type == type && pol->index == id &&
		    (mark & pol->mark.m) == pol->mark.v) {
Linus Torvalds's avatar
Linus Torvalds committed
863
			xfrm_pol_hold(pol);
864
			if (delete) {
865
866
				*err = security_xfrm_policy_delete(
								pol->security);
867
				if (*err) {
868
					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
869
870
					return pol;
				}
871
				__xfrm_policy_unlink(pol, dir);
872
873
			}
			ret = pol;
Linus Torvalds's avatar
Linus Torvalds committed
874
875
876
			break;
		}
	}
877
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
Linus Torvalds's avatar
Linus Torvalds committed
878

879
	if (ret && delete)
880
881
		xfrm_policy_kill(ret);
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
882
883
884
}
EXPORT_SYMBOL(xfrm_policy_byid);

885
886
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
887
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
Linus Torvalds's avatar
Linus Torvalds committed
888
{
889
890
891
892
893
894
	int dir, err = 0;

	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
		struct xfrm_policy *pol;
		int i;

895
		hlist_for_each_entry(pol,
896
				     &net->xfrm.policy_inexact[dir], bydst) {
897
898
			if (pol->type != type)
				continue;
899
			err = security_xfrm_policy_delete(pol->security);
900
			if (err) {
901
				xfrm_audit_policy_delete(pol, 0, task_valid);
902
903
				return err;
			}
904
		}
905
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
906
			hlist_for_each_entry(pol,
907
					     net->xfrm.policy_bydst[dir].table + i,
908
909
910
					     bydst) {
				if (pol->type != type)
					continue;
911
912
				err = security_xfrm_policy_delete(
								pol->security);
913
				if (err) {
Joy Latten's avatar
Joy Latten committed
914
					xfrm_audit_policy_delete(pol, 0,
915
								 task_valid);
916
917
918
919
920
921
922
923
924
					return err;
				}
			}
		}
	}
	return err;
}
#else
static inline int
925
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
926
927
928
929
930
{
	return 0;
}
#endif

931
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
932
{
933
	int dir, err = 0, cnt = 0;
Linus Torvalds's avatar
Linus Torvalds committed
934

935
	write_lock_bh(&net->xfrm.xfrm_policy_lock);
936

937
	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
938
939
940
	if (err)
		goto out;

Linus Torvalds's avatar
Linus Torvalds committed
941
	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
942
		struct xfrm_policy *pol;
943
		int i;
944
945

	again1:
946
		hlist_for_each_entry(pol,
947
				     &net->xfrm.policy_inexact[dir], bydst) {
948
949
			if (pol->type != type)
				continue;
950
			__xfrm_policy_unlink(pol, dir);
951
			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
952
			cnt++;
Linus Torvalds's avatar
Linus Torvalds committed
953

954
			xfrm_audit_policy_delete(pol, 1, task_valid);
Joy Latten's avatar
Joy Latten committed
955

956
			xfrm_policy_kill(pol);
Linus Torvalds's avatar
Linus Torvalds committed
957

958
			write_lock_bh(&net->xfrm.xfrm_policy_lock);
959
960
961
			goto again1;
		}

962
		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
963
	again2:
964
			hlist_for_each_entry(pol,
965
					     net->xfrm.policy_bydst[dir].table + i,
966
967
968
					     bydst) {
				if (pol->type != type)
					continue;
969
				__xfrm_policy_unlink(pol, dir);
970
				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
971
				cnt++;
972

973
				xfrm_audit_policy_delete(pol, 1, task_valid);
974
975
				xfrm_policy_kill(pol);

976
				write_lock_bh(&net->xfrm.xfrm_policy_lock);
977
978
				goto again2;
			}
Linus Torvalds's avatar
Linus Torvalds committed
979
		}
980

Linus Torvalds's avatar
Linus Torvalds committed
981
	}
982
983
	if (!cnt)
		err = -ESRCH;
984
out:
985
	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
986
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
987
988
989
}
EXPORT_SYMBOL(xfrm_policy_flush);

990
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
991
		     int (*func)(struct xfrm_policy *, int, int, void*),
Linus Torvalds's avatar
Linus Torvalds committed
992
993
		     void *data)
{
994
995
	struct xfrm_policy *pol;
	struct xfrm_policy_walk_entry *x;
996
997
998
999
1000
	int error = 0;

	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
	    walk->type != XFRM_POLICY_TYPE_ANY)
		return -EINVAL;
For faster browsing, not all history is shown. View entire blame