raw.c 25.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		RAW - implementation of IP "raw" sockets.
 *
8
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *
 * Fixes:
 *		Alan Cox	:	verify_area() fixed up
 *		Alan Cox	:	ICMP error handling
 *		Alan Cox	:	EMSGSIZE if you send too big a packet
 *		Alan Cox	: 	Now uses generic datagrams and shared
 *					skbuff library. No more peek crashes,
 *					no more backlogs
 *		Alan Cox	:	Checks sk->broadcast.
 *		Alan Cox	:	Uses skb_free_datagram/skb_copy_datagram
 *		Alan Cox	:	Raw passes ip options too
 *		Alan Cox	:	Setsocketopt added
 *		Alan Cox	:	Fixed error return for broadcasts
 *		Alan Cox	:	Removed wake_up calls
 *		Alan Cox	:	Use ttl/tos
 *		Alan Cox	:	Cleaned up old debugging
 *		Alan Cox	:	Use new kernel side addresses
 *	Arnt Gulbrandsen	:	Fixed MSG_DONTROUTE in raw sockets.
 *		Alan Cox	:	BSD style RAW socket demultiplexing.
 *		Alan Cox	:	Beginnings of mrouted support.
 *		Alan Cox	:	Added IP_HDRINCL option.
 *		Alan Cox	:	Skip broadcast check if BSDism set.
 *		David S. Miller	:	New socket lookup architecture.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
39

40
#include <linux/types.h>
Arun Sharma's avatar
Arun Sharma committed
41
#include <linux/atomic.h>
Linus Torvalds's avatar
Linus Torvalds committed
42
43
44
45
46
47
48
49
#include <asm/byteorder.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/kernel.h>
50
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
51
52
53
54
55
56
57
58
59
#include <linux/spinlock.h>
#include <linux/sockios.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/mroute.h>
#include <linux/netdevice.h>
#include <linux/in_route.h>
#include <linux/route.h>
#include <linux/skbuff.h>
60
#include <linux/igmp.h>
61
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
62
63
64
65
66
67
68
69
70
#include <net/dst.h>
#include <net/sock.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <net/snmp.h>
71
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
72
73
74
75
76
77
78
79
#include <net/inet_common.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
80
#include <linux/compat.h>
81
82
83
#include <linux/uio.h>

struct raw_frag_vec {
84
	struct msghdr *msg;
85
86
87
88
89
90
	union {
		struct icmphdr icmph;
		char c[1];
	} hdr;
	int hlen;
};
Linus Torvalds's avatar
Linus Torvalds committed
91

92
static struct raw_hashinfo raw_v4_hashinfo = {
93
	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
94
};
Linus Torvalds's avatar
Linus Torvalds committed
95

96
void raw_hash_sk(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
97
{
98
	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
99
	struct hlist_head *head;
Linus Torvalds's avatar
Linus Torvalds committed
100

101
	head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
102
103

	write_lock_bh(&h->lock);
Linus Torvalds's avatar
Linus Torvalds committed
104
	sk_add_node(sk, head);
105
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
106
107
108
109
	write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_hash_sk);

110
void raw_unhash_sk(struct sock *sk)
111
{
112
113
	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;

114
115
	write_lock_bh(&h->lock);
	if (sk_del_node_init(sk))
116
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
117
118
119
120
	write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);

121
122
static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
		unsigned short num, __be32 raddr, __be32 laddr, int dif)
Linus Torvalds's avatar
Linus Torvalds committed
123
{
124
	sk_for_each_from(sk) {
Linus Torvalds's avatar
Linus Torvalds committed
125
126
		struct inet_sock *inet = inet_sk(sk);

127
128
129
		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
Linus Torvalds's avatar
Linus Torvalds committed
130
131
132
133
134
135
136
137
138
139
140
141
		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
			goto found; /* gotcha */
	}
	sk = NULL;
found:
	return sk;
}

/*
 *	0 - deliver
 *	1 - block
 */
Eric Dumazet's avatar
Eric Dumazet committed
142
static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
143
{
Eric Dumazet's avatar
Eric Dumazet committed
144
145
	struct icmphdr _hdr;
	const struct icmphdr *hdr;
Linus Torvalds's avatar
Linus Torvalds committed
146

Eric Dumazet's avatar
Eric Dumazet committed
147
148
149
	hdr = skb_header_pointer(skb, skb_transport_offset(skb),
				 sizeof(_hdr), &_hdr);
	if (!hdr)
Linus Torvalds's avatar
Linus Torvalds committed
150
151
		return 1;

Eric Dumazet's avatar
Eric Dumazet committed
152
	if (hdr->type < 32) {
Linus Torvalds's avatar
Linus Torvalds committed
153
154
		__u32 data = raw_sk(sk)->filter.data;

Eric Dumazet's avatar
Eric Dumazet committed
155
		return ((1U << hdr->type) & data) != 0;
Linus Torvalds's avatar
Linus Torvalds committed
156
157
158
159
160
161
162
163
164
165
166
167
	}

	/* Do not block unknown ICMP types */
	return 0;
}

/* IP input processing comes here for RAW socket delivery.
 * Caller owns SKB, so we must make clones.
 *
 * RFC 1122: SHOULD pass TOS value up to the transport layer.
 * -> It does. And not only TOS, but all IP header.
 */
168
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
Linus Torvalds's avatar
Linus Torvalds committed
169
170
171
{
	struct sock *sk;
	struct hlist_head *head;
172
	int delivered = 0;
173
	struct net *net;
Linus Torvalds's avatar
Linus Torvalds committed
174

175
176
	read_lock(&raw_v4_hashinfo.lock);
	head = &raw_v4_hashinfo.ht[hash];
Linus Torvalds's avatar
Linus Torvalds committed
177
178
	if (hlist_empty(head))
		goto out;
179

180
	net = dev_net(skb->dev);
181
	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
Linus Torvalds's avatar
Linus Torvalds committed
182
183
184
185
			     iph->saddr, iph->daddr,
			     skb->dev->ifindex);

	while (sk) {
186
		delivered = 1;
187
188
189
		if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
		    ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
				   skb->dev->ifindex)) {
Linus Torvalds's avatar
Linus Torvalds committed
190
191
192
193
194
195
			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);

			/* Not releasing hash table! */
			if (clone)
				raw_rcv(sk, clone);
		}
196
		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
Linus Torvalds's avatar
Linus Torvalds committed
197
198
199
200
				     iph->saddr, iph->daddr,
				     skb->dev->ifindex);
	}
out:
201
	read_unlock(&raw_v4_hashinfo.lock);
202
	return delivered;
Linus Torvalds's avatar
Linus Torvalds committed
203
204
}

205
206
207
208
209
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
	int hash;
	struct sock *raw_sk;

210
211
	hash = protocol & (RAW_HTABLE_SIZE - 1);
	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
212
213
214
215
216
217
218
219
220
221
222
223

	/* If there maybe a raw socket we must check - if not we
	 * don't care less
	 */
	if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
		raw_sk = NULL;

	return raw_sk != NULL;

}

static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
Linus Torvalds's avatar
Linus Torvalds committed
224
225
{
	struct inet_sock *inet = inet_sk(sk);
226
227
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
Linus Torvalds's avatar
Linus Torvalds committed
228
229
230
	int err = 0;
	int harderr = 0;

231
232
	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
		ipv4_sk_update_pmtu(skb, sk, info);
233
	else if (type == ICMP_REDIRECT) {
234
		ipv4_sk_redirect(skb, sk);
235
236
		return;
	}
237

Linus Torvalds's avatar
Linus Torvalds committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
	/* Report error on raw socket, if:
	   1. User requested ip_recverr.
	   2. Socket is connected (otherwise the error indication
	      is useless without ip_recverr and error is hard.
	 */
	if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
		return;

	switch (type) {
	default:
	case ICMP_TIME_EXCEEDED:
		err = EHOSTUNREACH;
		break;
	case ICMP_SOURCE_QUENCH:
		return;
	case ICMP_PARAMETERPROB:
		err = EPROTO;
		harderr = 1;
		break;
	case ICMP_DEST_UNREACH:
		err = EHOSTUNREACH;
		if (code > NR_ICMP_UNREACH)
			break;
		err = icmp_err_convert[code].errno;
		harderr = icmp_err_convert[code].fatal;
		if (code == ICMP_FRAG_NEEDED) {
			harderr = inet->pmtudisc != IP_PMTUDISC_DONT;
			err = EMSGSIZE;
		}
	}

	if (inet->recverr) {
270
		const struct iphdr *iph = (const struct iphdr *)skb->data;
Linus Torvalds's avatar
Linus Torvalds committed
271
272
273
274
275
276
277
278
279
280
281
282
283
		u8 *payload = skb->data + (iph->ihl << 2);

		if (inet->hdrincl)
			payload = skb->data;
		ip_icmp_error(sk, skb, err, 0, info, payload);
	}

	if (inet->recverr || harderr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	}
}

284
285
286
287
void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
{
	int hash;
	struct sock *raw_sk;
288
	const struct iphdr *iph;
289
	struct net *net;
290

291
	hash = protocol & (RAW_HTABLE_SIZE - 1);
292

293
294
	read_lock(&raw_v4_hashinfo.lock);
	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
295
	if (raw_sk) {
296
		iph = (const struct iphdr *)skb->data;
297
		net = dev_net(skb->dev);
298
299
300

		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
						iph->daddr, iph->saddr,
301
302
303
						skb->dev->ifindex)) != NULL) {
			raw_err(raw_sk, skb, info);
			raw_sk = sk_next(raw_sk);
304
			iph = (const struct iphdr *)skb->data;
305
306
		}
	}
307
	read_unlock(&raw_v4_hashinfo.lock);
308
309
}

Daniel Baluta's avatar
Daniel Baluta committed
310
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
311
312
{
	/* Charge it to the socket. */
313

314
	ipv4_pktinfo_prepare(sk, skb);
315
	if (sock_queue_rcv_skb(sk, skb) < 0) {
Linus Torvalds's avatar
Linus Torvalds committed
316
317
318
319
320
321
322
323
324
325
		kfree_skb(skb);
		return NET_RX_DROP;
	}

	return NET_RX_SUCCESS;
}

int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
Wang Chen's avatar
Wang Chen committed
326
		atomic_inc(&sk->sk_drops);
Linus Torvalds's avatar
Linus Torvalds committed
327
328
329
		kfree_skb(skb);
		return NET_RX_DROP;
	}
330
	nf_reset(skb);
Linus Torvalds's avatar
Linus Torvalds committed
331

332
	skb_push(skb, skb->data - skb_network_header(skb));
Linus Torvalds's avatar
Linus Torvalds committed
333
334
335
336
337

	raw_rcv_skb(sk, skb);
	return 0;
}

338
static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
Al Viro's avatar
Al Viro committed
339
			   struct msghdr *msg, size_t length,
340
341
			   struct rtable **rtp,
			   unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
342
343
{
	struct inet_sock *inet = inet_sk(sk);
344
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
345
346
	struct iphdr *iph;
	struct sk_buff *skb;
347
	unsigned int iphlen;
Linus Torvalds's avatar
Linus Torvalds committed
348
	int err;
Eric Dumazet's avatar
Eric Dumazet committed
349
	struct rtable *rt = *rtp;
350
	int hlen, tlen;
Linus Torvalds's avatar
Linus Torvalds committed
351

352
	if (length > rt->dst.dev->mtu) {
353
		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
354
			       rt->dst.dev->mtu);
Linus Torvalds's avatar
Linus Torvalds committed
355
356
357
358
359
		return -EMSGSIZE;
	}
	if (flags&MSG_PROBE)
		goto out;

360
361
	hlen = LL_RESERVED_SPACE(rt->dst.dev);
	tlen = rt->dst.dev->needed_tailroom;
362
	skb = sock_alloc_send_skb(sk,
363
				  length + hlen + tlen + 15,
364
				  flags & MSG_DONTWAIT, &err);
365
	if (!skb)
366
		goto error;
367
	skb_reserve(skb, hlen);
Linus Torvalds's avatar
Linus Torvalds committed
368
369

	skb->priority = sk->sk_priority;
370
	skb->mark = sk->sk_mark;
371
	skb_dst_set(skb, &rt->dst);
Eric Dumazet's avatar
Eric Dumazet committed
372
	*rtp = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
373

374
	skb_reset_network_header(skb);
375
	iph = ip_hdr(skb);
376
	skb_put(skb, length);
Linus Torvalds's avatar
Linus Torvalds committed
377
378
379

	skb->ip_summed = CHECKSUM_NONE;

380
381
	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);

382
	skb->transport_header = skb->network_header;
383
	err = -EFAULT;
384
	if (memcpy_from_msg(iph, msg, length))
385
		goto error_free;
Linus Torvalds's avatar
Linus Torvalds committed
386

387
	iphlen = iph->ihl * 4;
388
389
390
391
392
393
394
395
396
397
398
399
400

	/*
	 * We don't want to modify the ip header, but we do need to
	 * be sure that it won't cause problems later along the network
	 * stack.  Specifically we want to make sure that iph->ihl is a
	 * sane value.  If ihl points beyond the length of the buffer passed
	 * in, reject the frame as invalid
	 */
	err = -EINVAL;
	if (iphlen > length)
		goto error_free;

	if (iphlen >= sizeof(*iph)) {
Linus Torvalds's avatar
Linus Torvalds committed
401
		if (!iph->saddr)
402
			iph->saddr = fl4->saddr;
Linus Torvalds's avatar
Linus Torvalds committed
403
404
405
		iph->check   = 0;
		iph->tot_len = htons(length);
		if (!iph->id)
406
			ip_select_ident(net, skb, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
407
408
409

		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}
410
	if (iph->protocol == IPPROTO_ICMP)
411
		icmp_out_count(net, ((struct icmphdr *)
412
			skb_transport_header(skb))->type);
Linus Torvalds's avatar
Linus Torvalds committed
413

414
415
416
	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
		      net, sk, skb, NULL, rt->dst.dev,
		      dst_output);
Linus Torvalds's avatar
Linus Torvalds committed
417
	if (err > 0)
Eric Dumazet's avatar
Eric Dumazet committed
418
		err = net_xmit_errno(err);
Linus Torvalds's avatar
Linus Torvalds committed
419
420
421
422
423
	if (err)
		goto error;
out:
	return 0;

424
error_free:
Linus Torvalds's avatar
Linus Torvalds committed
425
426
	kfree_skb(skb);
error:
Pavel Emelyanov's avatar
Pavel Emelyanov committed
427
	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
Eric Dumazet's avatar
Eric Dumazet committed
428
429
	if (err == -ENOBUFS && !inet->recverr)
		err = 0;
430
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
431
432
}

433
static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4)
Linus Torvalds's avatar
Linus Torvalds committed
434
{
435
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
436

437
	if (fl4->flowi4_proto != IPPROTO_ICMP)
Heiko Carstens's avatar
Heiko Carstens committed
438
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
439

440
	/* We only need the first two bytes. */
441
442
	rfv->hlen = 2;

443
	err = memcpy_from_msg(rfv->hdr.c, rfv->msg, rfv->hlen);
444
445
446
	if (err)
		return err;

447
448
	fl4->fl4_icmp_type = rfv->hdr.icmph.type;
	fl4->fl4_icmp_code = rfv->hdr.icmph.code;
449

Heiko Carstens's avatar
Heiko Carstens committed
450
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
451
452
}

453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
		       struct sk_buff *skb)
{
	struct raw_frag_vec *rfv = from;

	if (offset < rfv->hlen) {
		int copy = min(rfv->hlen - offset, len);

		if (skb->ip_summed == CHECKSUM_PARTIAL)
			memcpy(to, rfv->hdr.c + offset, copy);
		else
			skb->csum = csum_block_add(
				skb->csum,
				csum_partial_copy_nocheck(rfv->hdr.c + offset,
							  to, copy, 0),
				odd);

		odd = 0;
		offset += copy;
		to += copy;
		len -= copy;

		if (!len)
			return 0;
	}

	offset -= rfv->hlen;

481
	return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
482
483
}

484
static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
Linus Torvalds's avatar
Linus Torvalds committed
485
486
487
488
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
489
	struct flowi4 fl4;
Linus Torvalds's avatar
Linus Torvalds committed
490
	int free = 0;
491
	__be32 daddr;
492
	__be32 saddr;
Linus Torvalds's avatar
Linus Torvalds committed
493
494
	u8  tos;
	int err;
495
	struct ip_options_data opt_copy;
496
	struct raw_frag_vec rfv;
Linus Torvalds's avatar
Linus Torvalds committed
497
498

	err = -EMSGSIZE;
499
	if (len > 0xFFFF)
Linus Torvalds's avatar
Linus Torvalds committed
500
501
502
503
504
505
506
507
508
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */
509

Linus Torvalds's avatar
Linus Torvalds committed
510
	/*
511
	 *	Get and verify the address.
Linus Torvalds's avatar
Linus Torvalds committed
512
513
514
	 */

	if (msg->msg_namelen) {
515
		DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
Linus Torvalds's avatar
Linus Torvalds committed
516
517
518
519
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
520
521
			pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n",
				     __func__, current->comm);
Linus Torvalds's avatar
Linus Torvalds committed
522
523
524
525
526
527
528
529
530
531
532
			err = -EAFNOSUPPORT;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
533
		if (sk->sk_state != TCP_ESTABLISHED)
Linus Torvalds's avatar
Linus Torvalds committed
534
			goto out;
535
		daddr = inet->inet_daddr;
Linus Torvalds's avatar
Linus Torvalds committed
536
537
	}

538
	ipc.addr = inet->inet_saddr;
Linus Torvalds's avatar
Linus Torvalds committed
539
	ipc.opt = NULL;
540
	ipc.tx_flags = 0;
541
542
	ipc.ttl = 0;
	ipc.tos = -1;
Linus Torvalds's avatar
Linus Torvalds committed
543
544
545
	ipc.oif = sk->sk_bound_dev_if;

	if (msg->msg_controllen) {
546
		err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
Linus Torvalds's avatar
Linus Torvalds committed
547
548
549
550
551
552
553
554
555
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	saddr = ipc.addr;
	ipc.addr = daddr;

556
557
558
559
560
561
562
563
564
565
566
567
	if (!ipc.opt) {
		struct ip_options_rcu *inet_opt;

		rcu_read_lock();
		inet_opt = rcu_dereference(inet->inet_opt);
		if (inet_opt) {
			memcpy(&opt_copy, inet_opt,
			       sizeof(*inet_opt) + inet_opt->opt.optlen);
			ipc.opt = &opt_copy.opt;
		}
		rcu_read_unlock();
	}
Linus Torvalds's avatar
Linus Torvalds committed
568
569
570
571
572
573
574
575

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (inet->hdrincl)
			goto done;
576
		if (ipc.opt->opt.srr) {
Linus Torvalds's avatar
Linus Torvalds committed
577
578
			if (!daddr)
				goto done;
579
			daddr = ipc.opt->opt.faddr;
Linus Torvalds's avatar
Linus Torvalds committed
580
581
		}
	}
582
	tos = get_rtconn_flags(&ipc, sk);
Linus Torvalds's avatar
Linus Torvalds committed
583
584
585
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

586
	if (ipv4_is_multicast(daddr)) {
Linus Torvalds's avatar
Linus Torvalds committed
587
588
589
590
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
591
592
	} else if (!ipc.oif)
		ipc.oif = inet->uc_index;
Linus Torvalds's avatar
Linus Torvalds committed
593

594
595
596
	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
			   RT_SCOPE_UNIVERSE,
			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
597
			   inet_sk_flowi_flags(sk) |
598
			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
599
			   daddr, saddr, 0, 0);
600

601
	if (!inet->hdrincl) {
602
		rfv.msg = msg;
603
604
605
		rfv.hlen = 0;

		err = raw_probe_proto_opt(&rfv, &fl4);
606
		if (err)
607
			goto done;
608
609
610
611
612
613
614
615
	}

	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
	rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		rt = NULL;
		goto done;
Linus Torvalds's avatar
Linus Torvalds committed
616
617
618
619
620
621
622
623
624
625
626
	}

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	if (inet->hdrincl)
Al Viro's avatar
Al Viro committed
627
		err = raw_send_hdrinc(sk, &fl4, msg, len,
628
				      &rt, msg->msg_flags);
629

Linus Torvalds's avatar
Linus Torvalds committed
630
	 else {
631
632
		sock_tx_timestamp(sk, &ipc.tx_flags);

Linus Torvalds's avatar
Linus Torvalds committed
633
		if (!ipc.addr)
634
			ipc.addr = fl4.daddr;
Linus Torvalds's avatar
Linus Torvalds committed
635
		lock_sock(sk);
636
637
		err = ip_append_data(sk, &fl4, raw_getfrag,
				     &rfv, len, 0,
638
				     &ipc, &rt, msg->msg_flags);
Linus Torvalds's avatar
Linus Torvalds committed
639
640
		if (err)
			ip_flush_pending_frames(sk);
Eric Dumazet's avatar
Eric Dumazet committed
641
		else if (!(msg->msg_flags & MSG_MORE)) {
642
			err = ip_push_pending_frames(sk, &fl4);
Eric Dumazet's avatar
Eric Dumazet committed
643
644
645
			if (err == -ENOBUFS && !inet->recverr)
				err = 0;
		}
Linus Torvalds's avatar
Linus Torvalds committed
646
647
648
649
650
651
652
		release_sock(sk);
	}
done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

653
654
655
656
out:
	if (err < 0)
		return err;
	return len;
Linus Torvalds's avatar
Linus Torvalds committed
657
658

do_confirm:
659
	dst_confirm(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
660
661
662
663
664
665
666
667
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}

static void raw_close(struct sock *sk, long timeout)
{
668
	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
669
	 * Raw sockets may have direct kernel references. Kill them.
Linus Torvalds's avatar
Linus Torvalds committed
670
671
672
673
674
675
	 */
	ip_ra_control(sk, 0, NULL);

	sk_common_release(sk);
}

676
static void raw_destroy(struct sock *sk)
Denis V. Lunev's avatar
Denis V. Lunev committed
677
678
679
680
681
682
{
	lock_sock(sk);
	ip_flush_pending_frames(sk);
	release_sock(sk);
}

Linus Torvalds's avatar
Linus Torvalds committed
683
684
685
686
687
688
689
690
691
692
/* This gets rid of all the nasties in af_inet. -DaveM */
static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
	int ret = -EINVAL;
	int chk_addr_ret;

	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
		goto out;
693
	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
Linus Torvalds's avatar
Linus Torvalds committed
694
695
696
697
	ret = -EADDRNOTAVAIL;
	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
		goto out;
698
	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
Linus Torvalds's avatar
Linus Torvalds committed
699
	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
700
		inet->inet_saddr = 0;  /* Use device */
Linus Torvalds's avatar
Linus Torvalds committed
701
702
703
704
705
706
707
708
709
710
	sk_dst_reset(sk);
	ret = 0;
out:	return ret;
}

/*
 *	This should be easy, if there is something there
 *	we return it, otherwise we block.
 */

711
712
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
		       int noblock, int flags, int *addr_len)
Linus Torvalds's avatar
Linus Torvalds committed
713
714
715
716
{
	struct inet_sock *inet = inet_sk(sk);
	size_t copied = 0;
	int err = -EOPNOTSUPP;
717
	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
Linus Torvalds's avatar
Linus Torvalds committed
718
719
720
721
722
723
	struct sk_buff *skb;

	if (flags & MSG_OOB)
		goto out;

	if (flags & MSG_ERRQUEUE) {
724
		err = ip_recv_error(sk, msg, len, addr_len);
Linus Torvalds's avatar
Linus Torvalds committed
725
726
727
728
729
730
731
732
733
734
735
736
737
		goto out;
	}

	skb = skb_recv_datagram(sk, flags, noblock, &err);
	if (!skb)
		goto out;

	copied = skb->len;
	if (len < copied) {
		msg->msg_flags |= MSG_TRUNC;
		copied = len;
	}

738
	err = skb_copy_datagram_msg(skb, 0, msg, copied);
Linus Torvalds's avatar
Linus Torvalds committed
739
740
741
	if (err)
		goto done;

742
	sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
743
744
745
746

	/* Copy the address. */
	if (sin) {
		sin->sin_family = AF_INET;
747
		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
748
		sin->sin_port = 0;
Linus Torvalds's avatar
Linus Torvalds committed
749
		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
750
		*addr_len = sizeof(*sin);
Linus Torvalds's avatar
Linus Torvalds committed
751
752
753
754
755
756
757
	}
	if (inet->cmsg_flags)
		ip_cmsg_recv(msg, skb);
	if (flags & MSG_TRUNC)
		copied = skb->len;
done:
	skb_free_datagram(sk, skb);
758
759
760
761
out:
	if (err)
		return err;
	return copied;
Linus Torvalds's avatar
Linus Torvalds committed
762
763
764
765
766
767
}

static int raw_init(struct sock *sk)
{
	struct raw_sock *rp = raw_sk(sk);

768
	if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
Linus Torvalds's avatar
Linus Torvalds committed
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
		memset(&rp->filter, 0, sizeof(rp->filter));
	return 0;
}

static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen)
{
	if (optlen > sizeof(struct icmp_filter))
		optlen = sizeof(struct icmp_filter);
	if (copy_from_user(&raw_sk(sk)->filter, optval, optlen))
		return -EFAULT;
	return 0;
}

static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen)
{
	int len, ret = -EFAULT;

	if (get_user(len, optlen))
		goto out;
	ret = -EINVAL;
	if (len < 0)
		goto out;
	if (len > sizeof(struct icmp_filter))
		len = sizeof(struct icmp_filter);
	ret = -EFAULT;
	if (put_user(len, optlen) ||
	    copy_to_user(optval, &raw_sk(sk)->filter, len))
		goto out;
	ret = 0;
out:	return ret;
}

801
static int do_raw_setsockopt(struct sock *sk, int level, int optname,
802
			  char __user *optval, unsigned int optlen)
Linus Torvalds's avatar
Linus Torvalds committed
803
804
{
	if (optname == ICMP_FILTER) {
805
		if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
Linus Torvalds's avatar
Linus Torvalds committed
806
807
808
809
810
811
812
			return -EOPNOTSUPP;
		else
			return raw_seticmpfilter(sk, optval, optlen);
	}
	return -ENOPROTOOPT;
}

813
static int raw_setsockopt(struct sock *sk, int level, int optname,
814
			  char __user *optval, unsigned int optlen)
Linus Torvalds's avatar
Linus Torvalds committed
815
816
{
	if (level != SOL_RAW)
817
818
819
		return ip_setsockopt(sk, level, optname, optval, optlen);
	return do_raw_setsockopt(sk, level, optname, optval, optlen);
}
Linus Torvalds's avatar
Linus Torvalds committed
820

821
822
#ifdef CONFIG_COMPAT
static int compat_raw_setsockopt(struct sock *sk, int level, int optname,
823
				 char __user *optval, unsigned int optlen)
824
825
{
	if (level != SOL_RAW)
826
		return compat_ip_setsockopt(sk, level, optname, optval, optlen);
827
828
829
830
831
832
833
	return do_raw_setsockopt(sk, level, optname, optval, optlen);
}
#endif

static int do_raw_getsockopt(struct sock *sk, int level, int optname,
			  char __user *optval, int __user *optlen)
{
Linus Torvalds's avatar
Linus Torvalds committed
834
	if (optname == ICMP_FILTER) {
835
		if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
Linus Torvalds's avatar
Linus Torvalds committed
836
837
838
839
840
841
842
			return -EOPNOTSUPP;
		else
			return raw_geticmpfilter(sk, optval, optlen);
	}
	return -ENOPROTOOPT;
}

843
844
845
846
847
848
849
850
851
852
static int raw_getsockopt(struct sock *sk, int level, int optname,
			  char __user *optval, int __user *optlen)
{
	if (level != SOL_RAW)
		return ip_getsockopt(sk, level, optname, optval, optlen);
	return do_raw_getsockopt(sk, level, optname, optval, optlen);
}

#ifdef CONFIG_COMPAT
static int compat_raw_getsockopt(struct sock *sk, int level, int optname,
853
				 char __user *optval, int __user *optlen)
854
855
{
	if (level != SOL_RAW)
856
		return compat_ip_getsockopt(sk, level, optname, optval, optlen);
857
858
859
860
	return do_raw_getsockopt(sk, level, optname, optval, optlen);
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
861
862
863
static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
	switch (cmd) {
Joe Perches's avatar
Joe Perches committed
864
865
	case SIOCOUTQ: {
		int amount = sk_wmem_alloc_get(sk);
866

Joe Perches's avatar
Joe Perches committed
867
868
869
870
871
872
873
874
		return put_user(amount, (int __user *)arg);
	}
	case SIOCINQ: {
		struct sk_buff *skb;
		int amount = 0;

		spin_lock_bh(&sk->sk_receive_queue.lock);
		skb = skb_peek(&sk->sk_receive_queue);
875
		if (skb)
Joe Perches's avatar
Joe Perches committed
876
877
878
879
			amount = skb->len;
		spin_unlock_bh(&sk->sk_receive_queue.lock);
		return put_user(amount, (int __user *)arg);
	}
Linus Torvalds's avatar
Linus Torvalds committed
880

Joe Perches's avatar
Joe Perches committed
881
	default:
Linus Torvalds's avatar
Linus Torvalds committed
882
#ifdef CONFIG_IP_MROUTE
Joe Perches's avatar
Joe Perches committed
883
		return ipmr_ioctl(sk, cmd, (void __user *)arg);
Linus Torvalds's avatar
Linus Torvalds committed
884
#else
Joe Perches's avatar
Joe Perches committed
885
		return -ENOIOCTLCMD;
Linus Torvalds's avatar
Linus Torvalds committed
886
887
888
889
#endif
	}
}

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
#ifdef CONFIG_COMPAT
static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
{
	switch (cmd) {
	case SIOCOUTQ:
	case SIOCINQ:
		return -ENOIOCTLCMD;
	default:
#ifdef CONFIG_IP_MROUTE
		return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg));
#else
		return -ENOIOCTLCMD;
#endif
	}
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
907
struct proto raw_prot = {
908
909
910
	.name		   = "RAW",
	.owner		   = THIS_MODULE,
	.close		   = raw_close,
Denis V. Lunev's avatar
Denis V. Lunev committed
911
	.destroy	   = raw_destroy,
912
913
914
915
916
917
918
919
920
921
	.connect	   = ip4_datagram_connect,
	.disconnect	   = udp_disconnect,
	.ioctl		   = raw_ioctl,
	.init		   = raw_init,
	.setsockopt	   = raw_setsockopt,
	.getsockopt	   = raw_getsockopt,
	.sendmsg	   = raw_sendmsg,
	.recvmsg	   = raw_recvmsg,
	.bind		   = raw_bind,
	.backlog_rcv	   = raw_rcv_skb,
922
	.release_cb	   = ip4_datagram_release_cb,
923
924
	.hash		   = raw_hash_sk,
	.unhash		   = raw_unhash_sk,
925
	.obj_size	   = sizeof(struct raw_sock),
926
	.h.raw_hash	   = &raw_v4_hashinfo,
927
#ifdef CONFIG_COMPAT
928
929
	.compat_setsockopt = compat_raw_setsockopt,
	.compat_getsockopt = compat_raw_getsockopt,
930
	.compat_ioctl	   = compat_raw_ioctl,
931
#endif
Linus Torvalds's avatar
Linus Torvalds committed
932
933
934
935
936
937
};

#ifdef CONFIG_PROC_FS
static struct sock *raw_get_first(struct seq_file *seq)
{
	struct sock *sk;
938
	struct raw_iter_state *state = raw_seq_private(seq);
Linus Torvalds's avatar
Linus Torvalds committed
939

940
941
	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
			++state->bucket) {
942
		sk_for_each(sk, &state->h->ht[state->bucket])
943
			if (sock_net(sk) == seq_file_net(seq))
Linus Torvalds's avatar
Linus Torvalds committed
944
945
946
947
948
949
950
951
952
				goto found;
	}
	sk = NULL;
found:
	return sk;
}

static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
953
	struct raw_iter_state *state = raw_seq_private(seq);
Linus Torvalds's avatar
Linus Torvalds committed
954
955
956
957
958

	do {
		sk = sk_next(sk);
try_again:
		;
959
	} while (sk && sock_net(sk) != seq_file_net(seq));
Linus Torvalds's avatar
Linus Torvalds committed
960

961
	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
962
		sk = sk_head(&state->h->ht[state->bucket]);
Linus Torvalds's avatar
Linus Torvalds committed
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
		goto try_again;
	}
	return sk;
}

static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
{
	struct sock *sk = raw_get_first(seq);

	if (sk)
		while (pos && (sk = raw_get_next(seq, sk)) != NULL)
			--pos;
	return pos ? NULL : sk;
}

978
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
979
{
980
981
982
	struct raw_iter_state *state = raw_seq_private(seq);

	read_lock(&state->h->lock);
Linus Torvalds's avatar
Linus Torvalds committed
983
984
	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
985
EXPORT_SYMBOL_GPL(raw_seq_start);
Linus Torvalds's avatar
Linus Torvalds committed
986

987
void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Linus Torvalds's avatar
Linus Torvalds committed
988
989
990
991
992
993
994
995
996
997
{
	struct sock *sk;

	if (v == SEQ_START_TOKEN)
		sk = raw_get_first(seq);
	else
		sk = raw_get_next(seq, v);
	++*pos;
	return sk;
}
998
EXPORT_SYMBOL_GPL(raw_seq_next);
Linus Torvalds's avatar
Linus Torvalds committed
999

1000
void raw_seq_stop(struct seq_file *seq, void *v)