filter.c 22 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
 * Author:
 *     Jay Schulist <jschlst@samba.org>
 *
 * Based on the design of:
 *     - The Berkeley Packet Filter
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
16
 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
Linus Torvalds's avatar
Linus Torvalds committed
17
18
19
20
21
22
23
24
25
26
27
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
28
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
29
30
#include <net/ip.h>
#include <net/protocol.h>
31
#include <net/netlink.h>
Linus Torvalds's avatar
Linus Torvalds committed
32
33
34
35
36
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
37
#include <asm/unaligned.h>
Linus Torvalds's avatar
Linus Torvalds committed
38
#include <linux/filter.h>
39
#include <linux/ratelimit.h>
40
#include <linux/seccomp.h>
41
#include <linux/if_vlan.h>
Linus Torvalds's avatar
Linus Torvalds committed
42

43
44
45
46
47
/* No hurry in this branch
 *
 * Exported for the bpf jit load helper.
 */
void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
Linus Torvalds's avatar
Linus Torvalds committed
48
49
50
51
{
	u8 *ptr = NULL;

	if (k >= SKF_NET_OFF)
52
		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
Linus Torvalds's avatar
Linus Torvalds committed
53
	else if (k >= SKF_LL_OFF)
54
		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Linus Torvalds's avatar
Linus Torvalds committed
55

56
	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
Linus Torvalds's avatar
Linus Torvalds committed
57
58
59
60
		return ptr;
	return NULL;
}

61
static inline void *load_pointer(const struct sk_buff *skb, int k,
62
				 unsigned int size, void *buffer)
63
64
65
{
	if (k >= 0)
		return skb_header_pointer(skb, k, size, buffer);
66
	return bpf_internal_load_pointer_neg_helper(skb, k, size);
67
68
}

Stephen Hemminger's avatar
Stephen Hemminger committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/**
 *	sk_filter - run a packet through a socket filter
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to sk_run_filter. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct sk_filter *filter;

86
87
88
89
90
91
92
93
	/*
	 * If the skb was allocated from pfmemalloc reserves, only
	 * allow SOCK_MEMALLOC sockets to use it as this socket is
	 * helping free memory
	 */
	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
		return -ENOMEM;

Stephen Hemminger's avatar
Stephen Hemminger committed
94
95
96
97
	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

98
99
	rcu_read_lock();
	filter = rcu_dereference(sk->sk_filter);
Stephen Hemminger's avatar
Stephen Hemminger committed
100
	if (filter) {
101
		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
102

Stephen Hemminger's avatar
Stephen Hemminger committed
103
104
		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
	}
105
	rcu_read_unlock();
Stephen Hemminger's avatar
Stephen Hemminger committed
106
107
108
109
110

	return err;
}
EXPORT_SYMBOL(sk_filter);

Linus Torvalds's avatar
Linus Torvalds committed
111
/**
112
 *	sk_run_filter - run a filter on a socket
Linus Torvalds's avatar
Linus Torvalds committed
113
 *	@skb: buffer to run the filter on
114
 *	@fentry: filter to apply
Linus Torvalds's avatar
Linus Torvalds committed
115
116
 *
 * Decode and apply filter instructions to the skb->data.
Eric Dumazet's avatar
Eric Dumazet committed
117
118
119
120
121
 * Return length to keep, 0 for none. @skb is the data we are
 * filtering, @filter is the array of filter instructions.
 * Because all jumps are guaranteed to be before last instruction,
 * and last instruction guaranteed to be a RET, we dont need to check
 * flen. (We used to pass to this function the length of filter)
Linus Torvalds's avatar
Linus Torvalds committed
122
 */
123
124
unsigned int sk_run_filter(const struct sk_buff *skb,
			   const struct sock_filter *fentry)
Linus Torvalds's avatar
Linus Torvalds committed
125
{
126
	void *ptr;
127
128
	u32 A = 0;			/* Accumulator */
	u32 X = 0;			/* Index Register */
Linus Torvalds's avatar
Linus Torvalds committed
129
	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
130
	u32 tmp;
Linus Torvalds's avatar
Linus Torvalds committed
131
132
133
134
135
	int k;

	/*
	 * Process array of filter instructions.
	 */
Eric Dumazet's avatar
Eric Dumazet committed
136
137
138
139
140
141
	for (;; fentry++) {
#if defined(CONFIG_X86_32)
#define	K (fentry->k)
#else
		const u32 K = fentry->k;
#endif
142

Linus Torvalds's avatar
Linus Torvalds committed
143
		switch (fentry->code) {
144
		case BPF_S_ALU_ADD_X:
Linus Torvalds's avatar
Linus Torvalds committed
145
146
			A += X;
			continue;
147
		case BPF_S_ALU_ADD_K:
Eric Dumazet's avatar
Eric Dumazet committed
148
			A += K;
Linus Torvalds's avatar
Linus Torvalds committed
149
			continue;
150
		case BPF_S_ALU_SUB_X:
Linus Torvalds's avatar
Linus Torvalds committed
151
152
			A -= X;
			continue;
153
		case BPF_S_ALU_SUB_K:
Eric Dumazet's avatar
Eric Dumazet committed
154
			A -= K;
Linus Torvalds's avatar
Linus Torvalds committed
155
			continue;
156
		case BPF_S_ALU_MUL_X:
Linus Torvalds's avatar
Linus Torvalds committed
157
158
			A *= X;
			continue;
159
		case BPF_S_ALU_MUL_K:
Eric Dumazet's avatar
Eric Dumazet committed
160
			A *= K;
Linus Torvalds's avatar
Linus Torvalds committed
161
			continue;
162
		case BPF_S_ALU_DIV_X:
Linus Torvalds's avatar
Linus Torvalds committed
163
164
165
166
			if (X == 0)
				return 0;
			A /= X;
			continue;
167
		case BPF_S_ALU_DIV_K:
168
			A /= K;
Linus Torvalds's avatar
Linus Torvalds committed
169
			continue;
Eric Dumazet's avatar
Eric Dumazet committed
170
171
172
173
174
175
176
177
		case BPF_S_ALU_MOD_X:
			if (X == 0)
				return 0;
			A %= X;
			continue;
		case BPF_S_ALU_MOD_K:
			A %= K;
			continue;
178
		case BPF_S_ALU_AND_X:
Linus Torvalds's avatar
Linus Torvalds committed
179
180
			A &= X;
			continue;
181
		case BPF_S_ALU_AND_K:
Eric Dumazet's avatar
Eric Dumazet committed
182
			A &= K;
Linus Torvalds's avatar
Linus Torvalds committed
183
			continue;
184
		case BPF_S_ALU_OR_X:
Linus Torvalds's avatar
Linus Torvalds committed
185
186
			A |= X;
			continue;
187
		case BPF_S_ALU_OR_K:
Eric Dumazet's avatar
Eric Dumazet committed
188
			A |= K;
Linus Torvalds's avatar
Linus Torvalds committed
189
			continue;
190
191
192
193
194
195
196
		case BPF_S_ANC_ALU_XOR_X:
		case BPF_S_ALU_XOR_X:
			A ^= X;
			continue;
		case BPF_S_ALU_XOR_K:
			A ^= K;
			continue;
197
		case BPF_S_ALU_LSH_X:
Linus Torvalds's avatar
Linus Torvalds committed
198
199
			A <<= X;
			continue;
200
		case BPF_S_ALU_LSH_K:
Eric Dumazet's avatar
Eric Dumazet committed
201
			A <<= K;
Linus Torvalds's avatar
Linus Torvalds committed
202
			continue;
203
		case BPF_S_ALU_RSH_X:
Linus Torvalds's avatar
Linus Torvalds committed
204
205
			A >>= X;
			continue;
206
		case BPF_S_ALU_RSH_K:
Eric Dumazet's avatar
Eric Dumazet committed
207
			A >>= K;
Linus Torvalds's avatar
Linus Torvalds committed
208
			continue;
209
		case BPF_S_ALU_NEG:
Linus Torvalds's avatar
Linus Torvalds committed
210
211
			A = -A;
			continue;
212
		case BPF_S_JMP_JA:
Eric Dumazet's avatar
Eric Dumazet committed
213
			fentry += K;
Linus Torvalds's avatar
Linus Torvalds committed
214
			continue;
215
		case BPF_S_JMP_JGT_K:
Eric Dumazet's avatar
Eric Dumazet committed
216
			fentry += (A > K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
217
			continue;
218
		case BPF_S_JMP_JGE_K:
Eric Dumazet's avatar
Eric Dumazet committed
219
			fentry += (A >= K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
220
			continue;
221
		case BPF_S_JMP_JEQ_K:
Eric Dumazet's avatar
Eric Dumazet committed
222
			fentry += (A == K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
223
			continue;
224
		case BPF_S_JMP_JSET_K:
Eric Dumazet's avatar
Eric Dumazet committed
225
			fentry += (A & K) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
226
			continue;
227
		case BPF_S_JMP_JGT_X:
Eric Dumazet's avatar
Eric Dumazet committed
228
			fentry += (A > X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
229
			continue;
230
		case BPF_S_JMP_JGE_X:
Eric Dumazet's avatar
Eric Dumazet committed
231
			fentry += (A >= X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
232
			continue;
233
		case BPF_S_JMP_JEQ_X:
Eric Dumazet's avatar
Eric Dumazet committed
234
			fentry += (A == X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
235
			continue;
236
		case BPF_S_JMP_JSET_X:
Eric Dumazet's avatar
Eric Dumazet committed
237
			fentry += (A & X) ? fentry->jt : fentry->jf;
Linus Torvalds's avatar
Linus Torvalds committed
238
			continue;
239
		case BPF_S_LD_W_ABS:
Eric Dumazet's avatar
Eric Dumazet committed
240
			k = K;
241
load_w:
242
243
			ptr = load_pointer(skb, k, 4, &tmp);
			if (ptr != NULL) {
244
				A = get_unaligned_be32(ptr);
245
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
246
			}
247
			return 0;
248
		case BPF_S_LD_H_ABS:
Eric Dumazet's avatar
Eric Dumazet committed
249
			k = K;
250
load_h:
251
252
			ptr = load_pointer(skb, k, 2, &tmp);
			if (ptr != NULL) {
253
				A = get_unaligned_be16(ptr);
254
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
255
			}
256
			return 0;
257
		case BPF_S_LD_B_ABS:
Eric Dumazet's avatar
Eric Dumazet committed
258
			k = K;
Linus Torvalds's avatar
Linus Torvalds committed
259
load_b:
260
261
262
263
			ptr = load_pointer(skb, k, 1, &tmp);
			if (ptr != NULL) {
				A = *(u8 *)ptr;
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
264
			}
265
			return 0;
266
		case BPF_S_LD_W_LEN:
267
			A = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
268
			continue;
269
		case BPF_S_LDX_W_LEN:
270
			X = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed
271
			continue;
272
		case BPF_S_LD_W_IND:
Eric Dumazet's avatar
Eric Dumazet committed
273
			k = X + K;
Linus Torvalds's avatar
Linus Torvalds committed
274
			goto load_w;
275
		case BPF_S_LD_H_IND:
Eric Dumazet's avatar
Eric Dumazet committed
276
			k = X + K;
Linus Torvalds's avatar
Linus Torvalds committed
277
			goto load_h;
278
		case BPF_S_LD_B_IND:
Eric Dumazet's avatar
Eric Dumazet committed
279
			k = X + K;
Linus Torvalds's avatar
Linus Torvalds committed
280
			goto load_b;
281
		case BPF_S_LDX_B_MSH:
Eric Dumazet's avatar
Eric Dumazet committed
282
			ptr = load_pointer(skb, K, 1, &tmp);
283
284
285
286
287
			if (ptr != NULL) {
				X = (*(u8 *)ptr & 0xf) << 2;
				continue;
			}
			return 0;
288
		case BPF_S_LD_IMM:
Eric Dumazet's avatar
Eric Dumazet committed
289
			A = K;
Linus Torvalds's avatar
Linus Torvalds committed
290
			continue;
291
		case BPF_S_LDX_IMM:
Eric Dumazet's avatar
Eric Dumazet committed
292
			X = K;
Linus Torvalds's avatar
Linus Torvalds committed
293
			continue;
294
		case BPF_S_LD_MEM:
295
			A = mem[K];
Linus Torvalds's avatar
Linus Torvalds committed
296
			continue;
297
		case BPF_S_LDX_MEM:
298
			X = mem[K];
Linus Torvalds's avatar
Linus Torvalds committed
299
			continue;
300
		case BPF_S_MISC_TAX:
Linus Torvalds's avatar
Linus Torvalds committed
301
302
			X = A;
			continue;
303
		case BPF_S_MISC_TXA:
Linus Torvalds's avatar
Linus Torvalds committed
304
305
			A = X;
			continue;
306
		case BPF_S_RET_K:
Eric Dumazet's avatar
Eric Dumazet committed
307
			return K;
308
		case BPF_S_RET_A:
309
			return A;
310
		case BPF_S_ST:
Eric Dumazet's avatar
Eric Dumazet committed
311
			mem[K] = A;
Linus Torvalds's avatar
Linus Torvalds committed
312
			continue;
313
		case BPF_S_STX:
Eric Dumazet's avatar
Eric Dumazet committed
314
			mem[K] = X;
Linus Torvalds's avatar
Linus Torvalds committed
315
			continue;
316
		case BPF_S_ANC_PROTOCOL:
Al Viro's avatar
Al Viro committed
317
			A = ntohs(skb->protocol);
Linus Torvalds's avatar
Linus Torvalds committed
318
			continue;
319
		case BPF_S_ANC_PKTTYPE:
Linus Torvalds's avatar
Linus Torvalds committed
320
321
			A = skb->pkt_type;
			continue;
322
		case BPF_S_ANC_IFINDEX:
323
324
			if (!skb->dev)
				return 0;
Linus Torvalds's avatar
Linus Torvalds committed
325
326
			A = skb->dev->ifindex;
			continue;
327
		case BPF_S_ANC_MARK:
328
329
			A = skb->mark;
			continue;
330
		case BPF_S_ANC_QUEUE:
331
332
			A = skb->queue_mapping;
			continue;
333
		case BPF_S_ANC_HATYPE:
334
335
336
337
			if (!skb->dev)
				return 0;
			A = skb->dev->type;
			continue;
338
		case BPF_S_ANC_RXHASH:
339
			A = skb->hash;
340
			continue;
341
		case BPF_S_ANC_CPU:
342
343
			A = raw_smp_processor_id();
			continue;
344
345
346
347
348
349
		case BPF_S_ANC_VLAN_TAG:
			A = vlan_tx_tag_get(skb);
			continue;
		case BPF_S_ANC_VLAN_TAG_PRESENT:
			A = !!vlan_tx_tag_present(skb);
			continue;
350
351
352
		case BPF_S_ANC_PAY_OFFSET:
			A = __skb_get_poff(skb);
			continue;
353
		case BPF_S_ANC_NLATTR: {
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = nla_find((struct nlattr *)&skb->data[A],
				       skb->len - A, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
369
		case BPF_S_ANC_NLATTR_NEST: {
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = (struct nlattr *)&skb->data[A];
			if (nla->nla_len > A - skb->len)
				return 0;

			nla = nla_find_nested(nla, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
388
389
390
391
392
#ifdef CONFIG_SECCOMP_FILTER
		case BPF_S_ANC_SECCOMP_LD_W:
			A = seccomp_bpf_load(fentry->k);
			continue;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
393
		default:
Joe Perches's avatar
Joe Perches committed
394
395
396
			WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
				       fentry->code, fentry->jt,
				       fentry->jf, fentry->k);
Linus Torvalds's avatar
Linus Torvalds committed
397
398
399
400
401
402
			return 0;
		}
	}

	return 0;
}
403
EXPORT_SYMBOL(sk_run_filter);
Linus Torvalds's avatar
Linus Torvalds committed
404

405
406
407
408
409
410
411
/*
 * Security :
 * A BPF program is able to use 16 cells of memory to store intermediate
 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
 * As we dont want to clear mem[] array for each packet going through
 * sk_run_filter(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
Lucas De Marchi's avatar
Lucas De Marchi committed
412
 * a malicious user doesn't try to abuse us.
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
 */
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
	int pc, ret = 0;

	BUILD_BUG_ON(BPF_MEMWORDS > 16);
	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
	if (!masks)
		return -ENOMEM;
	memset(masks, 0xff, flen * sizeof(*masks));

	for (pc = 0; pc < flen; pc++) {
		memvalid &= masks[pc];

		switch (filter[pc].code) {
		case BPF_S_ST:
		case BPF_S_STX:
			memvalid |= (1 << filter[pc].k);
			break;
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
			if (!(memvalid & (1 << filter[pc].k))) {
				ret = -EINVAL;
				goto error;
			}
			break;
		case BPF_S_JMP_JA:
			/* a jump must set masks on target */
			masks[pc + 1 + filter[pc].k] &= memvalid;
			memvalid = ~0;
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
			/* a jump must set masks on targets */
			masks[pc + 1 + filter[pc].jt] &= memvalid;
			masks[pc + 1 + filter[pc].jf] &= memvalid;
			memvalid = ~0;
			break;
		}
	}
error:
	kfree(masks);
	return ret;
}

Linus Torvalds's avatar
Linus Torvalds committed
465
466
467
468
469
470
471
/**
 *	sk_chk_filter - verify socket filter code
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
472
473
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
Linus Torvalds's avatar
Linus Torvalds committed
474
 *
475
476
477
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
Linus Torvalds's avatar
Linus Torvalds committed
478
 */
479
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
Linus Torvalds's avatar
Linus Torvalds committed
480
{
481
482
483
484
485
	/*
	 * Valid instructions are initialized to non-0.
	 * Invalid instructions are initialized to 0.
	 */
	static const u8 codes[] = {
Eric Dumazet's avatar
Eric Dumazet committed
486
487
488
489
490
491
492
		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
Eric Dumazet's avatar
Eric Dumazet committed
493
494
		[BPF_ALU|BPF_MOD|BPF_K]  = BPF_S_ALU_MOD_K,
		[BPF_ALU|BPF_MOD|BPF_X]  = BPF_S_ALU_MOD_X,
Eric Dumazet's avatar
Eric Dumazet committed
495
496
497
498
		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
499
500
		[BPF_ALU|BPF_XOR|BPF_K]  = BPF_S_ALU_XOR_K,
		[BPF_ALU|BPF_XOR|BPF_X]  = BPF_S_ALU_XOR_X,
Eric Dumazet's avatar
Eric Dumazet committed
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
		[BPF_RET|BPF_K]          = BPF_S_RET_K,
		[BPF_RET|BPF_A]          = BPF_S_RET_A,
		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
		[BPF_ST]                 = BPF_S_ST,
		[BPF_STX]                = BPF_S_STX,
		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
535
	};
Linus Torvalds's avatar
Linus Torvalds committed
536
	int pc;
537
	bool anc_found;
Linus Torvalds's avatar
Linus Torvalds committed
538

539
	if (flen == 0 || flen > BPF_MAXINSNS)
Linus Torvalds's avatar
Linus Torvalds committed
540
541
542
543
		return -EINVAL;

	/* check the filter code now */
	for (pc = 0; pc < flen; pc++) {
544
545
		struct sock_filter *ftest = &filter[pc];
		u16 code = ftest->code;
546

547
548
549
		if (code >= ARRAY_SIZE(codes))
			return -EINVAL;
		code = codes[code];
Eric Dumazet's avatar
Eric Dumazet committed
550
		if (!code)
551
			return -EINVAL;
552
		/* Some instructions need special checks */
553
554
		switch (code) {
		case BPF_S_ALU_DIV_K:
Eric Dumazet's avatar
Eric Dumazet committed
555
556
557
558
559
		case BPF_S_ALU_MOD_K:
			/* check for division by zero */
			if (ftest->k == 0)
				return -EINVAL;
			break;
560
561
562
563
564
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
		case BPF_S_ST:
		case BPF_S_STX:
			/* check for invalid memory addresses */
565
566
567
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			break;
568
		case BPF_S_JMP_JA:
569
570
571
572
573
			/*
			 * Note, the large ftest->k might cause loops.
			 * Compare this with conditional jumps below,
			 * where offsets are limited. --ANK (981016)
			 */
574
			if (ftest->k >= (unsigned int)(flen-pc-1))
575
				return -EINVAL;
576
577
578
579
580
581
582
583
584
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
585
			/* for conditionals both must be safe */
586
			if (pc + ftest->jt + 1 >= flen ||
587
588
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
589
			break;
590
591
592
		case BPF_S_LD_W_ABS:
		case BPF_S_LD_H_ABS:
		case BPF_S_LD_B_ABS:
593
			anc_found = false;
594
595
#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
				code = BPF_S_ANC_##CODE;	\
596
				anc_found = true;		\
597
598
599
600
601
602
603
604
605
606
607
608
				break
			switch (ftest->k) {
			ANCILLARY(PROTOCOL);
			ANCILLARY(PKTTYPE);
			ANCILLARY(IFINDEX);
			ANCILLARY(NLATTR);
			ANCILLARY(NLATTR_NEST);
			ANCILLARY(MARK);
			ANCILLARY(QUEUE);
			ANCILLARY(HATYPE);
			ANCILLARY(RXHASH);
			ANCILLARY(CPU);
Jiri Pirko's avatar
Jiri Pirko committed
609
			ANCILLARY(ALU_XOR_X);
610
611
			ANCILLARY(VLAN_TAG);
			ANCILLARY(VLAN_TAG_PRESENT);
612
			ANCILLARY(PAY_OFFSET);
613
			}
614
615
616
617

			/* ancillary operation unknown or unsupported */
			if (anc_found == false && ftest->k >= SKF_AD_OFF)
				return -EINVAL;
618
		}
619
		ftest->code = code;
620
	}
621

622
623
624
625
	/* last instruction must be a RET code */
	switch (filter[flen - 1].code) {
	case BPF_S_RET_K:
	case BPF_S_RET_A:
626
		return check_load_and_stores(filter, flen);
627
628
	}
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
629
}
630
EXPORT_SYMBOL(sk_chk_filter);
Linus Torvalds's avatar
Linus Torvalds committed
631

632
/**
633
 * 	sk_filter_release_rcu - Release a socket filter by rcu_head
634
635
 *	@rcu: rcu_head that contains the sk_filter to free
 */
636
void sk_filter_release_rcu(struct rcu_head *rcu)
637
638
639
{
	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

640
	bpf_jit_free(fp);
641
}
642
EXPORT_SYMBOL(sk_filter_release_rcu);
643

644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
static int __sk_prepare_filter(struct sk_filter *fp)
{
	int err;

	fp->bpf_func = sk_run_filter;

	err = sk_chk_filter(fp->insns, fp->len);
	if (err)
		return err;

	bpf_jit_compile(fp);
	return 0;
}

/**
 *	sk_unattached_filter_create - create an unattached filter
 *	@fprog: the filter program
661
 *	@pfp: the unattached filter that is created
662
 *
663
 * Create a filter independent of any socket. We first run some
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
 * sanity checks on it to make sure it does not explode on us later.
 * If an error occurs or there is insufficient memory for the filter
 * a negative errno code is returned. On success the return is zero.
 */
int sk_unattached_filter_create(struct sk_filter **pfp,
				struct sock_fprog *fprog)
{
	struct sk_filter *fp;
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
	if (fprog->filter == NULL)
		return -EINVAL;

679
	fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
	if (!fp)
		return -ENOMEM;
	memcpy(fp->insns, fprog->filter, fsize);

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

	err = __sk_prepare_filter(fp);
	if (err)
		goto free_mem;

	*pfp = fp;
	return 0;
free_mem:
	kfree(fp);
	return err;
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_create);

void sk_unattached_filter_destroy(struct sk_filter *fp)
{
	sk_filter_release(fp);
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);

Linus Torvalds's avatar
Linus Torvalds committed
705
706
707
708
709
710
711
712
713
714
715
716
/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
717
	struct sk_filter *fp, *old_fp;
Linus Torvalds's avatar
Linus Torvalds committed
718
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
719
	unsigned int sk_fsize = sk_filter_size(fprog->len);
Linus Torvalds's avatar
Linus Torvalds committed
720
721
	int err;

722
723
724
	if (sock_flag(sk, SOCK_FILTER_LOCKED))
		return -EPERM;

Linus Torvalds's avatar
Linus Torvalds committed
725
	/* Make sure new filter is there and in the right amounts. */
726
727
	if (fprog->filter == NULL)
		return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
728

729
	fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
730
731
732
	if (!fp)
		return -ENOMEM;
	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
733
		sock_kfree_s(sk, fp, sk_fsize);
Linus Torvalds's avatar
Linus Torvalds committed
734
735
736
737
738
739
		return -EFAULT;
	}

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

740
	err = __sk_prepare_filter(fp);
741
742
743
	if (err) {
		sk_filter_uncharge(sk, fp);
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
744
745
	}

746
747
	old_fp = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
748
749
	rcu_assign_pointer(sk->sk_filter, fp);

750
	if (old_fp)
751
		sk_filter_uncharge(sk, old_fp);
752
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
753
}
754
EXPORT_SYMBOL_GPL(sk_attach_filter);
Linus Torvalds's avatar
Linus Torvalds committed
755

756
757
758
759
760
int sk_detach_filter(struct sock *sk)
{
	int ret = -ENOENT;
	struct sk_filter *filter;

761
762
763
	if (sock_flag(sk, SOCK_FILTER_LOCKED))
		return -EPERM;

764
765
	filter = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
766
	if (filter) {
767
		RCU_INIT_POINTER(sk->sk_filter, NULL);
768
		sk_filter_uncharge(sk, filter);
769
770
771
772
		ret = 0;
	}
	return ret;
}
773
EXPORT_SYMBOL_GPL(sk_detach_filter);
774

775
void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
{
	static const u16 decodes[] = {
		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K,
		[BPF_S_ALU_ADD_X]	= BPF_ALU|BPF_ADD|BPF_X,
		[BPF_S_ALU_SUB_K]	= BPF_ALU|BPF_SUB|BPF_K,
		[BPF_S_ALU_SUB_X]	= BPF_ALU|BPF_SUB|BPF_X,
		[BPF_S_ALU_MUL_K]	= BPF_ALU|BPF_MUL|BPF_K,
		[BPF_S_ALU_MUL_X]	= BPF_ALU|BPF_MUL|BPF_X,
		[BPF_S_ALU_DIV_X]	= BPF_ALU|BPF_DIV|BPF_X,
		[BPF_S_ALU_MOD_K]	= BPF_ALU|BPF_MOD|BPF_K,
		[BPF_S_ALU_MOD_X]	= BPF_ALU|BPF_MOD|BPF_X,
		[BPF_S_ALU_AND_K]	= BPF_ALU|BPF_AND|BPF_K,
		[BPF_S_ALU_AND_X]	= BPF_ALU|BPF_AND|BPF_X,
		[BPF_S_ALU_OR_K]	= BPF_ALU|BPF_OR|BPF_K,
		[BPF_S_ALU_OR_X]	= BPF_ALU|BPF_OR|BPF_X,
		[BPF_S_ALU_XOR_K]	= BPF_ALU|BPF_XOR|BPF_K,
		[BPF_S_ALU_XOR_X]	= BPF_ALU|BPF_XOR|BPF_X,
		[BPF_S_ALU_LSH_K]	= BPF_ALU|BPF_LSH|BPF_K,
		[BPF_S_ALU_LSH_X]	= BPF_ALU|BPF_LSH|BPF_X,
		[BPF_S_ALU_RSH_K]	= BPF_ALU|BPF_RSH|BPF_K,
		[BPF_S_ALU_RSH_X]	= BPF_ALU|BPF_RSH|BPF_X,
		[BPF_S_ALU_NEG]		= BPF_ALU|BPF_NEG,
		[BPF_S_LD_W_ABS]	= BPF_LD|BPF_W|BPF_ABS,
		[BPF_S_LD_H_ABS]	= BPF_LD|BPF_H|BPF_ABS,
		[BPF_S_LD_B_ABS]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_PROTOCOL]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_PKTTYPE]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_IFINDEX]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_NLATTR]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_NLATTR_NEST]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_MARK]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_QUEUE]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_HATYPE]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
815
		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
		[BPF_S_LD_B_IND]	= BPF_LD|BPF_B|BPF_IND,
		[BPF_S_LD_IMM]		= BPF_LD|BPF_IMM,
		[BPF_S_LDX_W_LEN]	= BPF_LDX|BPF_W|BPF_LEN,
		[BPF_S_LDX_B_MSH]	= BPF_LDX|BPF_B|BPF_MSH,
		[BPF_S_LDX_IMM]		= BPF_LDX|BPF_IMM,
		[BPF_S_MISC_TAX]	= BPF_MISC|BPF_TAX,
		[BPF_S_MISC_TXA]	= BPF_MISC|BPF_TXA,
		[BPF_S_RET_K]		= BPF_RET|BPF_K,
		[BPF_S_RET_A]		= BPF_RET|BPF_A,
		[BPF_S_ALU_DIV_K]	= BPF_ALU|BPF_DIV|BPF_K,
		[BPF_S_LD_MEM]		= BPF_LD|BPF_MEM,
		[BPF_S_LDX_MEM]		= BPF_LDX|BPF_MEM,
		[BPF_S_ST]		= BPF_ST,
		[BPF_S_STX]		= BPF_STX,
		[BPF_S_JMP_JA]		= BPF_JMP|BPF_JA,
		[BPF_S_JMP_JEQ_K]	= BPF_JMP|BPF_JEQ|BPF_K,
		[BPF_S_JMP_JEQ_X]	= BPF_JMP|BPF_JEQ|BPF_X,
		[BPF_S_JMP_JGE_K]	= BPF_JMP|BPF_JGE|BPF_K,
		[BPF_S_JMP_JGE_X]	= BPF_JMP|BPF_JGE|BPF_X,
		[BPF_S_JMP_JGT_K]	= BPF_JMP|BPF_JGT|BPF_K,
		[BPF_S_JMP_JGT_X]	= BPF_JMP|BPF_JGT|BPF_X,
		[BPF_S_JMP_JSET_K]	= BPF_JMP|BPF_JSET|BPF_K,
		[BPF_S_JMP_JSET_X]	= BPF_JMP|BPF_JSET|BPF_X,
	};
	u16 code;

	code = filt->code;

	to->code = decodes[code];
	to->jt = filt->jt;
	to->jf = filt->jf;
850
	to->k = filt->k;
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
}

int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
{
	struct sk_filter *filter;
	int i, ret;

	lock_sock(sk);
	filter = rcu_dereference_protected(sk->sk_filter,
			sock_owned_by_user(sk));
	ret = 0;
	if (!filter)
		goto out;
	ret = filter->len;
	if (!len)
		goto out;
	ret = -EINVAL;
	if (len < filter->len)
		goto out;

	ret = -EFAULT;
	for (i = 0; i < filter->len; i++) {
		struct sock_filter fb;

		sk_decode_filter(&filter->insns[i], &fb);
		if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
			goto out;
	}

	ret = filter->len;
out:
	release_sock(sk);
	return ret;
}