filter.c 16.2 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
 * Author:
 *     Jay Schulist <jschlst@samba.org>
 *
 * Based on the design of:
 *     - The Berkeley Packet Filter
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
16
 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
Linus Torvalds's avatar
Linus Torvalds committed
17
18
19
20
21
22
23
24
25
26
27
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
28
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
29
30
#include <net/ip.h>
#include <net/protocol.h>
31
#include <net/netlink.h>
Linus Torvalds's avatar
Linus Torvalds committed
32
33
34
35
36
37
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
38
#include <asm/unaligned.h>
Linus Torvalds's avatar
Linus Torvalds committed
39
#include <linux/filter.h>
Eric Dumazet's avatar
Eric Dumazet committed
40
#include <linux/reciprocal_div.h>
Linus Torvalds's avatar
Linus Torvalds committed
41

42
enum {
Eric Dumazet's avatar
Eric Dumazet committed
43
	BPF_S_RET_K = 1,
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
	BPF_S_RET_A,
	BPF_S_ALU_ADD_K,
	BPF_S_ALU_ADD_X,
	BPF_S_ALU_SUB_K,
	BPF_S_ALU_SUB_X,
	BPF_S_ALU_MUL_K,
	BPF_S_ALU_MUL_X,
	BPF_S_ALU_DIV_X,
	BPF_S_ALU_AND_K,
	BPF_S_ALU_AND_X,
	BPF_S_ALU_OR_K,
	BPF_S_ALU_OR_X,
	BPF_S_ALU_LSH_K,
	BPF_S_ALU_LSH_X,
	BPF_S_ALU_RSH_K,
	BPF_S_ALU_RSH_X,
	BPF_S_ALU_NEG,
	BPF_S_LD_W_ABS,
	BPF_S_LD_H_ABS,
	BPF_S_LD_B_ABS,
	BPF_S_LD_W_LEN,
	BPF_S_LD_W_IND,
	BPF_S_LD_H_IND,
	BPF_S_LD_B_IND,
	BPF_S_LD_IMM,
	BPF_S_LDX_W_LEN,
	BPF_S_LDX_B_MSH,
	BPF_S_LDX_IMM,
	BPF_S_MISC_TAX,
	BPF_S_MISC_TXA,
	BPF_S_ALU_DIV_K,
	BPF_S_LD_MEM,
	BPF_S_LDX_MEM,
	BPF_S_ST,
	BPF_S_STX,
	BPF_S_JMP_JA,
	BPF_S_JMP_JEQ_K,
	BPF_S_JMP_JEQ_X,
	BPF_S_JMP_JGE_K,
	BPF_S_JMP_JGE_X,
	BPF_S_JMP_JGT_K,
	BPF_S_JMP_JGT_X,
	BPF_S_JMP_JSET_K,
	BPF_S_JMP_JSET_X,
};

Linus Torvalds's avatar
Linus Torvalds committed
90
/* No hurry in this branch */
91
static void *__load_pointer(struct sk_buff *skb, int k)
Linus Torvalds's avatar
Linus Torvalds committed
92
93
94
95
{
	u8 *ptr = NULL;

	if (k >= SKF_NET_OFF)
96
		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
Linus Torvalds's avatar
Linus Torvalds committed
97
	else if (k >= SKF_LL_OFF)
98
		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Linus Torvalds's avatar
Linus Torvalds committed
99

100
	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
Linus Torvalds's avatar
Linus Torvalds committed
101
102
103
104
		return ptr;
	return NULL;
}

105
static inline void *load_pointer(struct sk_buff *skb, int k,
106
				 unsigned int size, void *buffer)
107
108
109
110
111
112
113
114
115
116
{
	if (k >= 0)
		return skb_header_pointer(skb, k, size, buffer);
	else {
		if (k >= SKF_AD_OFF)
			return NULL;
		return __load_pointer(skb, k);
	}
}

Stephen Hemminger's avatar
Stephen Hemminger committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/**
 *	sk_filter - run a packet through a socket filter
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to sk_run_filter. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct sk_filter *filter;

	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

	rcu_read_lock_bh();
139
	filter = rcu_dereference_bh(sk->sk_filter);
Stephen Hemminger's avatar
Stephen Hemminger committed
140
	if (filter) {
Eric Dumazet's avatar
Eric Dumazet committed
141
		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
142

Stephen Hemminger's avatar
Stephen Hemminger committed
143
144
145
146
147
148
149
150
		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
	}
	rcu_read_unlock_bh();

	return err;
}
EXPORT_SYMBOL(sk_filter);

Linus Torvalds's avatar
Linus Torvalds committed
151
/**
152
 *	sk_run_filter - run a filter on a socket
Linus Torvalds's avatar
Linus Torvalds committed
153
154
155
156
 *	@skb: buffer to run the filter on
 *	@filter: filter to apply
 *
 * Decode and apply filter instructions to the skb->data.
Eric Dumazet's avatar
Eric Dumazet committed
157
158
159
160
161
 * Return length to keep, 0 for none. @skb is the data we are
 * filtering, @filter is the array of filter instructions.
 * Because all jumps are guaranteed to be before last instruction,
 * and last instruction guaranteed to be a RET, we dont need to check
 * flen. (We used to pass to this function the length of filter)
Linus Torvalds's avatar
Linus Torvalds committed
162
 */
Eric Dumazet's avatar
Eric Dumazet committed
163
unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
Linus Torvalds's avatar
Linus Torvalds committed
164
{
165
	void *ptr;
166
167
	u32 A = 0;			/* Accumulator */
	u32 X = 0;			/* Index Register */
Linus Torvalds's avatar
Linus Torvalds committed
168
	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
169
	u32 tmp;
Linus Torvalds's avatar
Linus Torvalds committed
170
171
172
173
174
	int k;

	/*
	 * Process array of filter instructions.
	 */
Eric Dumazet's avatar
Eric Dumazet committed
175
176
177
178
179
180
	for (;; fentry++) {
#if defined(CONFIG_X86_32)
#define	K (fentry->k)
#else
		const u32 K = fentry->k;
#endif
181

Linus Torvalds's avatar
Linus Torvalds committed
182
		switch (fentry->code) {
183
		case BPF_S_ALU_ADD_X:
Linus Torvalds's avatar
Linus Torvalds committed
184
185
			A += X;
			continue;
186
		case BPF_S_ALU_ADD_K:
Eric Dumazet's avatar
Eric Dumazet committed
187
			A += K;
Linus Torvalds's avatar
Linus Torvalds committed
188
			continue;
189
		case BPF_S_ALU_SUB_X:
Linus Torvalds's avatar
Linus Torvalds committed
190
191
			A -= X;
			continue;
192
		case BPF_S_ALU_SUB_K:
Eric Dumazet's avatar
Eric Dumazet committed
193
			A -= K;
Linus Torvalds's avatar
Linus Torvalds committed
194
			continue;
195
		case BPF_S_ALU_MUL_X:
Linus Torvalds's avatar
Linus Torvalds committed
196
197
			A *= X;
			continue;
198
		case BPF_S_ALU_MUL_K:
Eric Dumazet's avatar
Eric Dumazet committed
199
			A *= K;
Linus Torvalds's avatar
Linus Torvalds committed
200
			continue;
201
		case BPF_S_ALU_DIV_X:
Linus Torvalds's avatar
Linus Torvalds committed
202
203
204
205
			if (X == 0)
				return 0;
			A /= X;
			continue;
206
		case BPF_S_ALU_DIV_K:
Eric Dumazet's avatar
Eric Dumazet committed
207
			A = reciprocal_divide(A, K);
Linus Torvalds's avatar
Linus Torvalds committed
208
			continue;
209
		case BPF_S_ALU_AND_X:
Linus Torvalds's avatar
Linus Torvalds committed
210
211
			A &= X;
			continue;
212
		case BPF_S_ALU_AND_K:
Eric Dumazet's avatar
Eric Dumazet committed
213
			A &= K;
Linus Torvalds's avatar
Linus Torvalds committed
214
			continue;
215
		case BPF_S_ALU_OR_X:
Linus Torvalds's avatar
Linus Torvalds committed
216
217
			A |= X;
			continue;
218
		case BPF_S_ALU_OR_K:
Eric Dumazet's avatar
Eric Dumazet committed
219
			A |= K;
Linus Torvalds's avatar
Linus Torvalds committed
220
			continue;
221
		case BPF_S_ALU_LSH_X:
Linus Torvalds's avatar
Linus Torvalds committed
222
223
			A <<= X;
			continue;
224
		case BPF_S_ALU_LSH_K:
Eric Dumazet's avatar
Eric Dumazet committed
225
			A <<= K;
Linus Torvalds's avatar
Linus Torvalds committed
226
			continue;
227
		case BPF_S_ALU_RSH_X:
Linus Torvalds's avatar
Linus Torvalds committed
228
229
			A >>= X;
			continue;
230
		case BPF_S_ALU_RSH_K:
Eric Dumazet's avatar
Eric Dumazet committed
231
			A >>= K;
Linus Torvalds's avatar
Linus Torvalds committed
232
			continue;
233
		case BPF_S_ALU_NEG:
Linus Torvalds's avatar
Linus Torvalds committed
234
235
			A = -A;
			continue;
236
		case BPF_S_JMP_JA:
Eric Dumazet's avatar
Eric Dumazet committed
237
			fentry += K;
Linus Torvalds's avatar
Linus Torvalds committed
238
			continue;
239
		case BPF_S_JMP_JGT_K:
Eric Dumazet's avatar
Eric Dumazet committed
240
			fentry += (A > K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
241
			continue;
242
		case BPF_S_JMP_JGE_K:
Eric Dumazet's avatar
Eric Dumazet committed
243
			fentry += (A >= K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
244
			continue;
245
		case BPF_S_JMP_JEQ_K:
Eric Dumazet's avatar
Eric Dumazet committed
246
			fentry += (A == K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
247
			continue;
248
		case BPF_S_JMP_JSET_K:
Eric Dumazet's avatar
Eric Dumazet committed
249
			fentry += (A & K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
250
			continue;
251
		case BPF_S_JMP_JGT_X:
Eric Dumazet's avatar
Eric Dumazet committed
252
			fentry += (A > X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
253
			continue;
254
		case BPF_S_JMP_JGE_X:
Eric Dumazet's avatar
Eric Dumazet committed
255
			fentry += (A >= X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
256
			continue;
257
		case BPF_S_JMP_JEQ_X:
Eric Dumazet's avatar
Eric Dumazet committed
258
			fentry += (A == X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
259
			continue;
260
		case BPF_S_JMP_JSET_X:
Eric Dumazet's avatar
Eric Dumazet committed
261
			fentry += (A & X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
262
			continue;
263
		case BPF_S_LD_W_ABS:
Eric Dumazet's avatar
Eric Dumazet committed
264
			k = K;
265
load_w:
266
267
			ptr = load_pointer(skb, k, 4, &tmp);
			if (ptr != NULL) {
268
				A = get_unaligned_be32(ptr);
269
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
270
			}
271
			break;
272
		case BPF_S_LD_H_ABS:
Eric Dumazet's avatar
Eric Dumazet committed
273
			k = K;
274
load_h:
275
276
			ptr = load_pointer(skb, k, 2, &tmp);
			if (ptr != NULL) {
277
				A = get_unaligned_be16(ptr);
278
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
279
			}
280
			break;
281
		case BPF_S_LD_B_ABS:
Eric Dumazet's avatar
Eric Dumazet committed
282
			k = K;
Linus Torvalds's avatar
Linus Torvalds committed
283
load_b:
284
285
286
287
			ptr = load_pointer(skb, k, 1, &tmp);
			if (ptr != NULL) {
				A = *(u8 *)ptr;
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
288
			}
289
			break;
290
		case BPF_S_LD_W_LEN:
291
			A = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
292
			continue;
293
		case BPF_S_LDX_W_LEN:
294
			X = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
295
			continue;
296
		case BPF_S_LD_W_IND:
Eric Dumazet's avatar
Eric Dumazet committed
297
			k = X + K;
Linus Torvalds's avatar
Linus Torvalds committed
298
			goto load_w;
299
		case BPF_S_LD_H_IND:
Eric Dumazet's avatar
Eric Dumazet committed
300
			k = X + K;
Linus Torvalds's avatar
Linus Torvalds committed
301
			goto load_h;
302
		case BPF_S_LD_B_IND:
Eric Dumazet's avatar
Eric Dumazet committed
303
			k = X + K;
Linus Torvalds's avatar
Linus Torvalds committed
304
			goto load_b;
305
		case BPF_S_LDX_B_MSH:
Eric Dumazet's avatar
Eric Dumazet committed
306
			ptr = load_pointer(skb, K, 1, &tmp);
307
308
309
310
311
			if (ptr != NULL) {
				X = (*(u8 *)ptr & 0xf) << 2;
				continue;
			}
			return 0;
312
		case BPF_S_LD_IMM:
Eric Dumazet's avatar
Eric Dumazet committed
313
			A = K;
Linus Torvalds's avatar
Linus Torvalds committed
314
			continue;
315
		case BPF_S_LDX_IMM:
Eric Dumazet's avatar
Eric Dumazet committed
316
			X = K;
Linus Torvalds's avatar
Linus Torvalds committed
317
			continue;
318
		case BPF_S_LD_MEM:
319
			A = mem[K];
Linus Torvalds's avatar
Linus Torvalds committed
320
			continue;
321
		case BPF_S_LDX_MEM:
322
			X = mem[K];
Linus Torvalds's avatar
Linus Torvalds committed
323
			continue;
324
		case BPF_S_MISC_TAX:
Linus Torvalds's avatar
Linus Torvalds committed
325
326
			X = A;
			continue;
327
		case BPF_S_MISC_TXA:
Linus Torvalds's avatar
Linus Torvalds committed
328
329
			A = X;
			continue;
330
		case BPF_S_RET_K:
Eric Dumazet's avatar
Eric Dumazet committed
331
			return K;
332
		case BPF_S_RET_A:
333
			return A;
334
		case BPF_S_ST:
Eric Dumazet's avatar
Eric Dumazet committed
335
			mem[K] = A;
Linus Torvalds's avatar
Linus Torvalds committed
336
			continue;
337
		case BPF_S_STX:
Eric Dumazet's avatar
Eric Dumazet committed
338
			mem[K] = X;
Linus Torvalds's avatar
Linus Torvalds committed
339
340
			continue;
		default:
341
			WARN_ON(1);
Linus Torvalds's avatar
Linus Torvalds committed
342
343
344
345
346
347
348
349
350
			return 0;
		}

		/*
		 * Handle ancillary data, which are impossible
		 * (or very difficult) to get parsing packet contents.
		 */
		switch (k-SKF_AD_OFF) {
		case SKF_AD_PROTOCOL:
Al Viro's avatar
Al Viro committed
351
			A = ntohs(skb->protocol);
Linus Torvalds's avatar
Linus Torvalds committed
352
353
354
355
356
			continue;
		case SKF_AD_PKTTYPE:
			A = skb->pkt_type;
			continue;
		case SKF_AD_IFINDEX:
357
358
			if (!skb->dev)
				return 0;
Linus Torvalds's avatar
Linus Torvalds committed
359
360
			A = skb->dev->ifindex;
			continue;
361
362
363
		case SKF_AD_MARK:
			A = skb->mark;
			continue;
364
365
366
		case SKF_AD_QUEUE:
			A = skb->queue_mapping;
			continue;
367
368
369
370
371
		case SKF_AD_HATYPE:
			if (!skb->dev)
				return 0;
			A = skb->dev->type;
			continue;
372
373
374
375
376
377
		case SKF_AD_RXHASH:
			A = skb->rxhash;
			continue;
		case SKF_AD_CPU:
			A = raw_smp_processor_id();
			continue;
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
		case SKF_AD_NLATTR: {
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = nla_find((struct nlattr *)&skb->data[A],
				       skb->len - A, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
		case SKF_AD_NLATTR_NEST: {
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = (struct nlattr *)&skb->data[A];
			if (nla->nla_len > A - skb->len)
				return 0;

			nla = nla_find_nested(nla, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
413
414
415
416
417
418
419
		default:
			return 0;
		}
	}

	return 0;
}
420
EXPORT_SYMBOL(sk_run_filter);
Linus Torvalds's avatar
Linus Torvalds committed
421

422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
/*
 * Security :
 * A BPF program is able to use 16 cells of memory to store intermediate
 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
 * As we dont want to clear mem[] array for each packet going through
 * sk_run_filter(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
 * a malicious user doesnt try to abuse us.
 */
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
	int pc, ret = 0;

	BUILD_BUG_ON(BPF_MEMWORDS > 16);
	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
	if (!masks)
		return -ENOMEM;
	memset(masks, 0xff, flen * sizeof(*masks));

	for (pc = 0; pc < flen; pc++) {
		memvalid &= masks[pc];

		switch (filter[pc].code) {
		case BPF_S_ST:
		case BPF_S_STX:
			memvalid |= (1 << filter[pc].k);
			break;
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
			if (!(memvalid & (1 << filter[pc].k))) {
				ret = -EINVAL;
				goto error;
			}
			break;
		case BPF_S_JMP_JA:
			/* a jump must set masks on target */
			masks[pc + 1 + filter[pc].k] &= memvalid;
			memvalid = ~0;
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
			/* a jump must set masks on targets */
			masks[pc + 1 + filter[pc].jt] &= memvalid;
			masks[pc + 1 + filter[pc].jf] &= memvalid;
			memvalid = ~0;
			break;
		}
	}
error:
	kfree(masks);
	return ret;
}

Linus Torvalds's avatar
Linus Torvalds committed
482
483
484
485
486
487
488
/**
 *	sk_chk_filter - verify socket filter code
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
489
490
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
Linus Torvalds's avatar
Linus Torvalds committed
491
 *
492
493
494
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
Linus Torvalds's avatar
Linus Torvalds committed
495
496
497
 */
int sk_chk_filter(struct sock_filter *filter, int flen)
{
498
499
500
501
502
	/*
	 * Valid instructions are initialized to non-0.
	 * Invalid instructions are initialized to 0.
	 */
	static const u8 codes[] = {
Eric Dumazet's avatar
Eric Dumazet committed
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
		[BPF_RET|BPF_K]          = BPF_S_RET_K,
		[BPF_RET|BPF_A]          = BPF_S_RET_A,
		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
		[BPF_ST]                 = BPF_S_ST,
		[BPF_STX]                = BPF_S_STX,
		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
548
	};
Linus Torvalds's avatar
Linus Torvalds committed
549
550
	int pc;

551
	if (flen == 0 || flen > BPF_MAXINSNS)
Linus Torvalds's avatar
Linus Torvalds committed
552
553
554
555
		return -EINVAL;

	/* check the filter code now */
	for (pc = 0; pc < flen; pc++) {
556
557
		struct sock_filter *ftest = &filter[pc];
		u16 code = ftest->code;
558

559
560
561
		if (code >= ARRAY_SIZE(codes))
			return -EINVAL;
		code = codes[code];
Eric Dumazet's avatar
Eric Dumazet committed
562
		if (!code)
563
			return -EINVAL;
564
		/* Some instructions need special checks */
565
566
		switch (code) {
		case BPF_S_ALU_DIV_K:
567
568
			/* check for division by zero */
			if (ftest->k == 0)
Linus Torvalds's avatar
Linus Torvalds committed
569
				return -EINVAL;
Eric Dumazet's avatar
Eric Dumazet committed
570
			ftest->k = reciprocal_value(ftest->k);
571
			break;
572
573
574
575
576
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
		case BPF_S_ST:
		case BPF_S_STX:
			/* check for invalid memory addresses */
577
578
579
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			break;
580
		case BPF_S_JMP_JA:
581
582
583
584
585
586
587
			/*
			 * Note, the large ftest->k might cause loops.
			 * Compare this with conditional jumps below,
			 * where offsets are limited. --ANK (981016)
			 */
			if (ftest->k >= (unsigned)(flen-pc-1))
				return -EINVAL;
588
589
590
591
592
593
594
595
596
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
597
			/* for conditionals both must be safe */
598
			if (pc + ftest->jt + 1 >= flen ||
599
600
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
601
			break;
602
		}
603
		ftest->code = code;
604
	}
605

606
607
608
609
	/* last instruction must be a RET code */
	switch (filter[flen - 1].code) {
	case BPF_S_RET_K:
	case BPF_S_RET_A:
610
		return check_load_and_stores(filter, flen);
611
612
	}
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
613
}
614
EXPORT_SYMBOL(sk_chk_filter);
Linus Torvalds's avatar
Linus Torvalds committed
615

616
/**
617
 * 	sk_filter_rcu_release - Release a socket filter by rcu_head
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
 *	@rcu: rcu_head that contains the sk_filter to free
 */
static void sk_filter_rcu_release(struct rcu_head *rcu)
{
	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

	sk_filter_release(fp);
}

static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
{
	unsigned int size = sk_filter_len(fp);

	atomic_sub(size, &sk->sk_omem_alloc);
	call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
}

Linus Torvalds's avatar
Linus Torvalds committed
635
636
637
638
639
640
641
642
643
644
645
646
/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
647
	struct sk_filter *fp, *old_fp;
Linus Torvalds's avatar
Linus Torvalds committed
648
649
650
651
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
652
653
	if (fprog->filter == NULL)
		return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
654
655
656
657
658

	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;
	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
659
		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
Linus Torvalds's avatar
Linus Torvalds committed
660
661
662
663
664
665
666
		return -EFAULT;
	}

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

	err = sk_chk_filter(fp->insns, fp->len);
667
668
669
	if (err) {
		sk_filter_uncharge(sk, fp);
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
670
671
	}

672
673
	old_fp = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
674
675
	rcu_assign_pointer(sk->sk_filter, fp);

676
677
	if (old_fp)
		sk_filter_delayed_uncharge(sk, old_fp);
678
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
679
}
680
EXPORT_SYMBOL_GPL(sk_attach_filter);
Linus Torvalds's avatar
Linus Torvalds committed
681

682
683
684
685
686
int sk_detach_filter(struct sock *sk)
{
	int ret = -ENOENT;
	struct sk_filter *filter;

687
688
	filter = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
689
690
	if (filter) {
		rcu_assign_pointer(sk->sk_filter, NULL);
691
		sk_filter_delayed_uncharge(sk, filter);
692
693
694
695
		ret = 0;
	}
	return ret;
}
696
EXPORT_SYMBOL_GPL(sk_detach_filter);