blob: f162d59d8161f23b829adf78d83b5c065f06a883 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Johann Baudy69e3c752009-05-18 22:11:22 -070042 * Johann Baudy : Added TX RING.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080053#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080061#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <linux/kmod.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090063#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020064#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <net/ip.h>
66#include <net/protocol.h>
67#include <linux/skbuff.h>
68#include <net/sock.h>
69#include <linux/errno.h>
70#include <linux/timer.h>
71#include <asm/system.h>
72#include <asm/uaccess.h>
73#include <asm/ioctls.h>
74#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040075#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070076#include <asm/io.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79#include <linux/poll.h>
80#include <linux/module.h>
81#include <linux/init.h>
Herbert Xu905db442009-01-30 14:12:06 -080082#include <linux/mutex.h>
Eric Dumazet05423b22009-10-26 18:40:35 -070083#include <linux/if_vlan.h>
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -080084#include <linux/virtio_net.h>
Richard Cochraned85b562010-04-07 22:41:28 +000085#include <linux/errqueue.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87#ifdef CONFIG_INET
88#include <net/inet_common.h>
89#endif
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 Assumptions:
93 - if device has no dev->hard_header routine, it adds and removes ll header
94 inside itself. In this case ll header is invisible outside of device,
95 but higher levels still should reserve dev->hard_header_len.
96 Some devices are enough clever to reallocate skb, when header
97 will not fit to reserved space (tunnel), another ones are silly
98 (PPP).
99 - packet socket receives packets with pulled ll header,
100 so that SOCK_RAW should push it back.
101
102On receive:
103-----------
104
105Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700106 mac_header -> ll header
107 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700110 mac_header -> ll header
111 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
113Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700114 mac_header -> UNKNOWN position. It is very likely, that it points to ll
115 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900116 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700117 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700120 mac_header -> data. ll header is still not built!
121 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
123Resume
124 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
125
126
127On transmit:
128------------
129
130dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700131 mac_header -> ll header
132 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
134dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700135 mac_header -> data
136 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
138 We should set nh.raw on output to correct posistion,
139 packet classifier depends on it.
140 */
141
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142/* Private packet socket structures. */
143
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000144struct packet_mclist {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 struct packet_mclist *next;
146 int ifindex;
147 int count;
148 unsigned short type;
149 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700150 unsigned char addr[MAX_ADDR_LEN];
151};
152/* identical to struct packet_mreq except it has
153 * a longer address field.
154 */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000155struct packet_mreq_max {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700156 int mr_ifindex;
157 unsigned short mr_type;
158 unsigned short mr_alen;
159 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160};
David S. Millera2efcfa2007-05-29 13:12:50 -0700161
Johann Baudy69e3c752009-05-18 22:11:22 -0700162static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
163 int closing, int tx_ring);
164
165struct packet_ring_buffer {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000166 char **pg_vec;
Johann Baudy69e3c752009-05-18 22:11:22 -0700167 unsigned int head;
168 unsigned int frames_per_block;
169 unsigned int frame_size;
170 unsigned int frame_max;
171
172 unsigned int pg_vec_order;
173 unsigned int pg_vec_pages;
174 unsigned int pg_vec_len;
175
176 atomic_t pending;
177};
178
179struct packet_sock;
180static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182static void packet_flush_mclist(struct sock *sk);
183
184struct packet_sock {
185 /* struct sock has to be the first member of packet_sock */
186 struct sock sk;
187 struct tpacket_stats stats;
Johann Baudy69e3c752009-05-18 22:11:22 -0700188 struct packet_ring_buffer rx_ring;
189 struct packet_ring_buffer tx_ring;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 int copy_thresh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 spinlock_t bind_lock;
Herbert Xu905db442009-01-30 14:12:06 -0800192 struct mutex pg_vec_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800193 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700194 auxdata:1,
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -0800195 origdev:1,
196 has_vnet_hdr:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800198 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 atomic_t mapped;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700201 enum tpacket_versions tp_version;
202 unsigned int tp_hdrlen;
Patrick McHardy8913336a2008-07-18 18:05:19 -0700203 unsigned int tp_reserve;
Johann Baudy69e3c752009-05-18 22:11:22 -0700204 unsigned int tp_loss:1;
Eric Dumazet94b059522009-10-16 04:02:20 +0000205 struct packet_type prot_hook ____cacheline_aligned_in_smp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206};
207
Herbert Xuffbc6112007-02-04 23:33:10 -0800208struct packet_skb_cb {
209 unsigned int origlen;
210 union {
211 struct sockaddr_pkt pkt;
212 struct sockaddr_ll ll;
213 } sa;
214};
215
216#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800217
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700218static void __packet_set_status(struct packet_sock *po, void *frame, int status)
219{
220 union {
221 struct tpacket_hdr *h1;
222 struct tpacket2_hdr *h2;
223 void *raw;
224 } h;
225
226 h.raw = frame;
227 switch (po->tp_version) {
228 case TPACKET_V1:
229 h.h1->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700230 flush_dcache_page(virt_to_page(&h.h1->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700231 break;
232 case TPACKET_V2:
233 h.h2->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700234 flush_dcache_page(virt_to_page(&h.h2->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700235 break;
Johann Baudy69e3c752009-05-18 22:11:22 -0700236 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000237 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700238 BUG();
239 }
240
241 smp_wmb();
242}
243
244static int __packet_get_status(struct packet_sock *po, void *frame)
245{
246 union {
247 struct tpacket_hdr *h1;
248 struct tpacket2_hdr *h2;
249 void *raw;
250 } h;
251
252 smp_rmb();
253
254 h.raw = frame;
255 switch (po->tp_version) {
256 case TPACKET_V1:
257 flush_dcache_page(virt_to_page(&h.h1->tp_status));
258 return h.h1->tp_status;
259 case TPACKET_V2:
260 flush_dcache_page(virt_to_page(&h.h2->tp_status));
261 return h.h2->tp_status;
262 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000263 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700264 BUG();
265 return 0;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700266 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267}
Johann Baudy69e3c752009-05-18 22:11:22 -0700268
269static void *packet_lookup_frame(struct packet_sock *po,
270 struct packet_ring_buffer *rb,
271 unsigned int position,
272 int status)
273{
274 unsigned int pg_vec_pos, frame_offset;
275 union {
276 struct tpacket_hdr *h1;
277 struct tpacket2_hdr *h2;
278 void *raw;
279 } h;
280
281 pg_vec_pos = position / rb->frames_per_block;
282 frame_offset = position % rb->frames_per_block;
283
284 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
285
286 if (status != __packet_get_status(po, h.raw))
287 return NULL;
288
289 return h.raw;
290}
291
292static inline void *packet_current_frame(struct packet_sock *po,
293 struct packet_ring_buffer *rb,
294 int status)
295{
296 return packet_lookup_frame(po, rb, rb->head, status);
297}
298
299static inline void *packet_previous_frame(struct packet_sock *po,
300 struct packet_ring_buffer *rb,
301 int status)
302{
303 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
304 return packet_lookup_frame(po, rb, previous, status);
305}
306
307static inline void packet_increment_head(struct packet_ring_buffer *buff)
308{
309 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
310}
311
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312static inline struct packet_sock *pkt_sk(struct sock *sk)
313{
314 return (struct packet_sock *)sk;
315}
316
317static void packet_sock_destruct(struct sock *sk)
318{
Richard Cochraned85b562010-04-07 22:41:28 +0000319 skb_queue_purge(&sk->sk_error_queue);
320
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700321 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
322 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
324 if (!sock_flag(sk, SOCK_DEAD)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000325 pr_err("Attempt to release alive packet socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 return;
327 }
328
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800329 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330}
331
332
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800333static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800335static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000337static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
338 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339{
340 struct sock *sk;
341 struct sockaddr_pkt *spkt;
342
343 /*
344 * When we registered the protocol we saved the socket in the data
345 * field for just this event.
346 */
347
348 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900349
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 /*
351 * Yank back the headers [hope the device set this
352 * right or kerboom...]
353 *
354 * Incoming packets have ll header pulled,
355 * push it back.
356 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700357 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 * so that this procedure is noop.
359 */
360
361 if (skb->pkt_type == PACKET_LOOPBACK)
362 goto out;
363
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800364 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800365 goto out;
366
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000367 skb = skb_share_check(skb, GFP_ATOMIC);
368 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 goto oom;
370
371 /* drop any routing info */
Eric Dumazetadf30902009-06-02 05:19:30 +0000372 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
Phil Oester84531c22005-07-12 11:57:52 -0700374 /* drop conntrack reference */
375 nf_reset(skb);
376
Herbert Xuffbc6112007-02-04 23:33:10 -0800377 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700379 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
381 /*
382 * The SOCK_PACKET socket receives _all_ frames.
383 */
384
385 spkt->spkt_family = dev->type;
386 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
387 spkt->spkt_protocol = skb->protocol;
388
389 /*
390 * Charge the memory to the socket. This is done specifically
391 * to prevent sockets using all the memory up.
392 */
393
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000394 if (sock_queue_rcv_skb(sk, skb) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return 0;
396
397out:
398 kfree_skb(skb);
399oom:
400 return 0;
401}
402
403
404/*
405 * Output a raw packet to a device layer. This bypasses all the other
406 * protocol layers and you must therefore supply it with a complete frame
407 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900408
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
410 struct msghdr *msg, size_t len)
411{
412 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000413 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000414 struct sk_buff *skb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 struct net_device *dev;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000416 __be16 proto = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900418
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900420 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 */
422
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000423 if (saddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 if (msg->msg_namelen < sizeof(struct sockaddr))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000425 return -EINVAL;
426 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
427 proto = saddr->spkt_protocol;
428 } else
429 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900432 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 */
434
435 saddr->spkt_device[13] = 0;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000436retry:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100437 rcu_read_lock();
438 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 err = -ENODEV;
440 if (dev == NULL)
441 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900442
David S. Millerd5e76b02007-01-25 19:30:36 -0800443 err = -ENETDOWN;
444 if (!(dev->flags & IFF_UP))
445 goto out_unlock;
446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000448 * You may not queue a frame bigger than the mtu. This is the lowest level
449 * raw protocol and you must do your own fragmentation at this level.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900451
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800453 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 goto out_unlock;
455
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000456 if (!skb) {
457 size_t reserved = LL_RESERVED_SPACE(dev);
458 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000460 rcu_read_unlock();
461 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
462 if (skb == NULL)
463 return -ENOBUFS;
464 /* FIXME: Save some space for broken drivers that write a hard
465 * header at transmission time by themselves. PPP is the notable
466 * one here. This should really be fixed at the driver level.
467 */
468 skb_reserve(skb, reserved);
469 skb_reset_network_header(skb);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900470
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000471 /* Try to align data part correctly */
472 if (hhlen) {
473 skb->data -= hhlen;
474 skb->tail -= hhlen;
475 if (len < hhlen)
476 skb_reset_network_header(skb);
477 }
478 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
479 if (err)
480 goto out_free;
481 goto retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 }
483
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000484
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 skb->protocol = proto;
486 skb->dev = dev;
487 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000488 skb->mark = sk->sk_mark;
Richard Cochraned85b562010-04-07 22:41:28 +0000489 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
490 if (err < 0)
491 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
493 dev_queue_xmit(skb);
Eric Dumazet654d1f82009-11-02 10:43:32 +0100494 rcu_read_unlock();
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000495 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497out_unlock:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100498 rcu_read_unlock();
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000499out_free:
500 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 return err;
502}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
David S. Millerdbcb5852007-01-24 15:21:02 -0800504static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
505 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506{
507 struct sk_filter *filter;
508
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700509 rcu_read_lock_bh();
Paul E. McKenneya898def2010-02-22 17:04:49 -0800510 filter = rcu_dereference_bh(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800511 if (filter != NULL)
512 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700513 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514
David S. Millerdbcb5852007-01-24 15:21:02 -0800515 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516}
517
518/*
519 This function makes lazy skb cloning in hope that most of packets
520 are discarded by BPF.
521
522 Note tricky part: we DO mangle shared skb! skb->data, skb->len
523 and skb->cb are mangled. It works because (and until) packets
524 falling here are owned by current CPU. Output packets are cloned
525 by dev_queue_xmit_nit(), input packets are processed by net_bh
526 sequencially, so that if we return skb to original state on exit,
527 we will not harm anyone.
528 */
529
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000530static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
531 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532{
533 struct sock *sk;
534 struct sockaddr_ll *sll;
535 struct packet_sock *po;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000536 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800538 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539
540 if (skb->pkt_type == PACKET_LOOPBACK)
541 goto drop;
542
543 sk = pt->af_packet_priv;
544 po = pkt_sk(sk);
545
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800546 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800547 goto drop;
548
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 skb->dev = dev;
550
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700551 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 /* The device has an explicit notion of ll header,
553 exported to higher levels.
554
555 Otherwise, the device hides datails of it frame
556 structure, so that corresponding packet head
557 never delivered to user.
558 */
559 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700560 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 else if (skb->pkt_type == PACKET_OUTGOING) {
562 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300563 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 }
565 }
566
567 snaplen = skb->len;
568
David S. Millerdbcb5852007-01-24 15:21:02 -0800569 res = run_filter(skb, sk, snaplen);
570 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700571 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800572 if (snaplen > res)
573 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574
575 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
576 (unsigned)sk->sk_rcvbuf)
577 goto drop_n_acct;
578
579 if (skb_shared(skb)) {
580 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
581 if (nskb == NULL)
582 goto drop_n_acct;
583
584 if (skb_head != skb->data) {
585 skb->data = skb_head;
586 skb->len = skb_len;
587 }
588 kfree_skb(skb);
589 skb = nskb;
590 }
591
Herbert Xuffbc6112007-02-04 23:33:10 -0800592 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
593 sizeof(skb->cb));
594
595 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 sll->sll_family = AF_PACKET;
597 sll->sll_hatype = dev->type;
598 sll->sll_protocol = skb->protocol;
599 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800600 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700601 sll->sll_ifindex = orig_dev->ifindex;
602 else
603 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700605 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
Herbert Xuffbc6112007-02-04 23:33:10 -0800607 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800608
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 if (pskb_trim(skb, snaplen))
610 goto drop_n_acct;
611
612 skb_set_owner_r(skb, sk);
613 skb->dev = NULL;
Eric Dumazetadf30902009-06-02 05:19:30 +0000614 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
Phil Oester84531c22005-07-12 11:57:52 -0700616 /* drop conntrack reference */
617 nf_reset(skb);
618
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 spin_lock(&sk->sk_receive_queue.lock);
620 po->stats.tp_packets++;
Neil Horman3b885782009-10-12 13:26:31 -0700621 skb->dropcount = atomic_read(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 __skb_queue_tail(&sk->sk_receive_queue, skb);
623 spin_unlock(&sk->sk_receive_queue.lock);
624 sk->sk_data_ready(sk, skb->len);
625 return 0;
626
627drop_n_acct:
Neil Horman3b885782009-10-12 13:26:31 -0700628 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
630drop_n_restore:
631 if (skb_head != skb->data && skb_shared(skb)) {
632 skb->data = skb_head;
633 skb->len = skb_len;
634 }
635drop:
Neil Hormanead2ceb2009-03-11 09:49:55 +0000636 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 return 0;
638}
639
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000640static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
641 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct sock *sk;
644 struct packet_sock *po;
645 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700646 union {
647 struct tpacket_hdr *h1;
648 struct tpacket2_hdr *h2;
649 void *raw;
650 } h;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000651 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800653 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700655 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700657 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700658 struct timespec ts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
660 if (skb->pkt_type == PACKET_LOOPBACK)
661 goto drop;
662
663 sk = pt->af_packet_priv;
664 po = pkt_sk(sk);
665
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800666 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800667 goto drop;
668
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700669 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700671 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 else if (skb->pkt_type == PACKET_OUTGOING) {
673 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300674 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 }
676 }
677
Herbert Xu8dc41942007-02-04 23:31:32 -0800678 if (skb->ip_summed == CHECKSUM_PARTIAL)
679 status |= TP_STATUS_CSUMNOTREADY;
680
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 snaplen = skb->len;
682
David S. Millerdbcb5852007-01-24 15:21:02 -0800683 res = run_filter(skb, sk, snaplen);
684 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700685 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800686 if (snaplen > res)
687 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
689 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardy8913336a2008-07-18 18:05:19 -0700690 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
691 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300693 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700694 netoff = TPACKET_ALIGN(po->tp_hdrlen +
Patrick McHardy8913336a2008-07-18 18:05:19 -0700695 (maclen < 16 ? 16 : maclen)) +
696 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 macoff = netoff - maclen;
698 }
699
Johann Baudy69e3c752009-05-18 22:11:22 -0700700 if (macoff + snaplen > po->rx_ring.frame_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 if (po->copy_thresh &&
702 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
703 (unsigned)sk->sk_rcvbuf) {
704 if (skb_shared(skb)) {
705 copy_skb = skb_clone(skb, GFP_ATOMIC);
706 } else {
707 copy_skb = skb_get(skb);
708 skb_head = skb->data;
709 }
710 if (copy_skb)
711 skb_set_owner_r(copy_skb, sk);
712 }
Johann Baudy69e3c752009-05-18 22:11:22 -0700713 snaplen = po->rx_ring.frame_size - macoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 if ((int)snaplen < 0)
715 snaplen = 0;
716 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 spin_lock(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -0700719 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700720 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 goto ring_is_full;
Johann Baudy69e3c752009-05-18 22:11:22 -0700722 packet_increment_head(&po->rx_ring);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 po->stats.tp_packets++;
724 if (copy_skb) {
725 status |= TP_STATUS_COPY;
726 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
727 }
728 if (!po->stats.tp_drops)
729 status &= ~TP_STATUS_LOSING;
730 spin_unlock(&sk->sk_receive_queue.lock);
731
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700732 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700734 switch (po->tp_version) {
735 case TPACKET_V1:
736 h.h1->tp_len = skb->len;
737 h.h1->tp_snaplen = snaplen;
738 h.h1->tp_mac = macoff;
739 h.h1->tp_net = netoff;
740 if (skb->tstamp.tv64)
741 tv = ktime_to_timeval(skb->tstamp);
742 else
743 do_gettimeofday(&tv);
744 h.h1->tp_sec = tv.tv_sec;
745 h.h1->tp_usec = tv.tv_usec;
746 hdrlen = sizeof(*h.h1);
747 break;
748 case TPACKET_V2:
749 h.h2->tp_len = skb->len;
750 h.h2->tp_snaplen = snaplen;
751 h.h2->tp_mac = macoff;
752 h.h2->tp_net = netoff;
753 if (skb->tstamp.tv64)
754 ts = ktime_to_timespec(skb->tstamp);
755 else
756 getnstimeofday(&ts);
757 h.h2->tp_sec = ts.tv_sec;
758 h.h2->tp_nsec = ts.tv_nsec;
Eric Dumazet05423b22009-10-26 18:40:35 -0700759 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700760 hdrlen = sizeof(*h.h2);
761 break;
762 default:
763 BUG();
764 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700766 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700767 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 sll->sll_family = AF_PACKET;
769 sll->sll_hatype = dev->type;
770 sll->sll_protocol = skb->protocol;
771 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800772 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700773 sll->sll_ifindex = orig_dev->ifindex;
774 else
775 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700777 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800778 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 {
780 struct page *p_start, *p_end;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700781 u8 *h_end = h.raw + macoff + snaplen - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700783 p_start = virt_to_page(h.raw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 p_end = virt_to_page(h_end);
785 while (p_start <= p_end) {
786 flush_dcache_page(p_start);
787 p_start++;
788 }
789 }
790
791 sk->sk_data_ready(sk, 0);
792
793drop_n_restore:
794 if (skb_head != skb->data && skb_shared(skb)) {
795 skb->data = skb_head;
796 skb->len = skb_len;
797 }
798drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900799 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 return 0;
801
802ring_is_full:
803 po->stats.tp_drops++;
804 spin_unlock(&sk->sk_receive_queue.lock);
805
806 sk->sk_data_ready(sk, 0);
Wei Yongjunacb5d752009-02-25 00:36:42 +0000807 kfree_skb(copy_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 goto drop_n_restore;
809}
810
Johann Baudy69e3c752009-05-18 22:11:22 -0700811static void tpacket_destruct_skb(struct sk_buff *skb)
812{
813 struct packet_sock *po = pkt_sk(skb->sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000814 void *ph;
Johann Baudy69e3c752009-05-18 22:11:22 -0700815
816 BUG_ON(skb == NULL);
817
818 if (likely(po->tx_ring.pg_vec)) {
819 ph = skb_shinfo(skb)->destructor_arg;
820 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
821 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
822 atomic_dec(&po->tx_ring.pending);
823 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
824 }
825
826 sock_wfree(skb);
827}
828
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000829static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
830 void *frame, struct net_device *dev, int size_max,
831 __be16 proto, unsigned char *addr)
Johann Baudy69e3c752009-05-18 22:11:22 -0700832{
833 union {
834 struct tpacket_hdr *h1;
835 struct tpacket2_hdr *h2;
836 void *raw;
837 } ph;
838 int to_write, offset, len, tp_len, nr_frags, len_max;
839 struct socket *sock = po->sk.sk_socket;
840 struct page *page;
841 void *data;
842 int err;
843
844 ph.raw = frame;
845
846 skb->protocol = proto;
847 skb->dev = dev;
848 skb->priority = po->sk.sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000849 skb->mark = po->sk.sk_mark;
Johann Baudy69e3c752009-05-18 22:11:22 -0700850 skb_shinfo(skb)->destructor_arg = ph.raw;
851
852 switch (po->tp_version) {
853 case TPACKET_V2:
854 tp_len = ph.h2->tp_len;
855 break;
856 default:
857 tp_len = ph.h1->tp_len;
858 break;
859 }
860 if (unlikely(tp_len > size_max)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000861 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
Johann Baudy69e3c752009-05-18 22:11:22 -0700862 return -EMSGSIZE;
863 }
864
865 skb_reserve(skb, LL_RESERVED_SPACE(dev));
866 skb_reset_network_header(skb);
867
868 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
869 to_write = tp_len;
870
871 if (sock->type == SOCK_DGRAM) {
872 err = dev_hard_header(skb, dev, ntohs(proto), addr,
873 NULL, tp_len);
874 if (unlikely(err < 0))
875 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000876 } else if (dev->hard_header_len) {
Johann Baudy69e3c752009-05-18 22:11:22 -0700877 /* net device doesn't like empty head */
878 if (unlikely(tp_len <= dev->hard_header_len)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000879 pr_err("packet size is too short (%d < %d)\n",
880 tp_len, dev->hard_header_len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700881 return -EINVAL;
882 }
883
884 skb_push(skb, dev->hard_header_len);
885 err = skb_store_bits(skb, 0, data,
886 dev->hard_header_len);
887 if (unlikely(err))
888 return err;
889
890 data += dev->hard_header_len;
891 to_write -= dev->hard_header_len;
892 }
893
894 err = -EFAULT;
895 page = virt_to_page(data);
896 offset = offset_in_page(data);
897 len_max = PAGE_SIZE - offset;
898 len = ((to_write > len_max) ? len_max : to_write);
899
900 skb->data_len = to_write;
901 skb->len += to_write;
902 skb->truesize += to_write;
903 atomic_add(to_write, &po->sk.sk_wmem_alloc);
904
905 while (likely(to_write)) {
906 nr_frags = skb_shinfo(skb)->nr_frags;
907
908 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000909 pr_err("Packet exceed the number of skb frags(%lu)\n",
910 MAX_SKB_FRAGS);
Johann Baudy69e3c752009-05-18 22:11:22 -0700911 return -EFAULT;
912 }
913
914 flush_dcache_page(page);
915 get_page(page);
916 skb_fill_page_desc(skb,
917 nr_frags,
918 page++, offset, len);
919 to_write -= len;
920 offset = 0;
921 len_max = PAGE_SIZE;
922 len = ((to_write > len_max) ? len_max : to_write);
923 }
924
925 return tp_len;
926}
927
928static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
929{
930 struct socket *sock;
931 struct sk_buff *skb;
932 struct net_device *dev;
933 __be16 proto;
934 int ifindex, err, reserve = 0;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000935 void *ph;
936 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Johann Baudy69e3c752009-05-18 22:11:22 -0700937 int tp_len, size_max;
938 unsigned char *addr;
939 int len_sum = 0;
940 int status = 0;
941
942 sock = po->sk.sk_socket;
943
944 mutex_lock(&po->pg_vec_lock);
945
946 err = -EBUSY;
947 if (saddr == NULL) {
948 ifindex = po->ifindex;
949 proto = po->num;
950 addr = NULL;
951 } else {
952 err = -EINVAL;
953 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
954 goto out;
955 if (msg->msg_namelen < (saddr->sll_halen
956 + offsetof(struct sockaddr_ll,
957 sll_addr)))
958 goto out;
959 ifindex = saddr->sll_ifindex;
960 proto = saddr->sll_protocol;
961 addr = saddr->sll_addr;
962 }
963
964 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
965 err = -ENXIO;
966 if (unlikely(dev == NULL))
967 goto out;
968
969 reserve = dev->hard_header_len;
970
971 err = -ENETDOWN;
972 if (unlikely(!(dev->flags & IFF_UP)))
973 goto out_put;
974
975 size_max = po->tx_ring.frame_size
Gabor Gombasb5dd8842009-10-29 03:19:11 -0700976 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
Johann Baudy69e3c752009-05-18 22:11:22 -0700977
978 if (size_max > dev->mtu + reserve)
979 size_max = dev->mtu + reserve;
980
981 do {
982 ph = packet_current_frame(po, &po->tx_ring,
983 TP_STATUS_SEND_REQUEST);
984
985 if (unlikely(ph == NULL)) {
986 schedule();
987 continue;
988 }
989
990 status = TP_STATUS_SEND_REQUEST;
991 skb = sock_alloc_send_skb(&po->sk,
992 LL_ALLOCATED_SPACE(dev)
993 + sizeof(struct sockaddr_ll),
994 0, &err);
995
996 if (unlikely(skb == NULL))
997 goto out_status;
998
999 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1000 addr);
1001
1002 if (unlikely(tp_len < 0)) {
1003 if (po->tp_loss) {
1004 __packet_set_status(po, ph,
1005 TP_STATUS_AVAILABLE);
1006 packet_increment_head(&po->tx_ring);
1007 kfree_skb(skb);
1008 continue;
1009 } else {
1010 status = TP_STATUS_WRONG_FORMAT;
1011 err = tp_len;
1012 goto out_status;
1013 }
1014 }
1015
1016 skb->destructor = tpacket_destruct_skb;
1017 __packet_set_status(po, ph, TP_STATUS_SENDING);
1018 atomic_inc(&po->tx_ring.pending);
1019
1020 status = TP_STATUS_SEND_REQUEST;
1021 err = dev_queue_xmit(skb);
Jarek Poplawskieb70df12010-01-10 22:04:19 +00001022 if (unlikely(err > 0)) {
1023 err = net_xmit_errno(err);
1024 if (err && __packet_get_status(po, ph) ==
1025 TP_STATUS_AVAILABLE) {
1026 /* skb was destructed already */
1027 skb = NULL;
1028 goto out_status;
1029 }
1030 /*
1031 * skb was dropped but not destructed yet;
1032 * let's treat it like congestion or err < 0
1033 */
1034 err = 0;
1035 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001036 packet_increment_head(&po->tx_ring);
1037 len_sum += tp_len;
Joe Perchesf64f9e72009-11-29 16:55:45 -08001038 } while (likely((ph != NULL) ||
1039 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1040 (atomic_read(&po->tx_ring.pending))))
1041 );
Johann Baudy69e3c752009-05-18 22:11:22 -07001042
1043 err = len_sum;
1044 goto out_put;
1045
Johann Baudy69e3c752009-05-18 22:11:22 -07001046out_status:
1047 __packet_set_status(po, ph, status);
1048 kfree_skb(skb);
1049out_put:
1050 dev_put(dev);
1051out:
1052 mutex_unlock(&po->pg_vec_lock);
1053 return err;
1054}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001056static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1057 size_t reserve, size_t len,
1058 size_t linear, int noblock,
1059 int *err)
1060{
1061 struct sk_buff *skb;
1062
1063 /* Under a page? Don't bother with paged skb. */
1064 if (prepad + len < PAGE_SIZE || !linear)
1065 linear = len;
1066
1067 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1068 err);
1069 if (!skb)
1070 return NULL;
1071
1072 skb_reserve(skb, reserve);
1073 skb_put(skb, linear);
1074 skb->data_len = len - linear;
1075 skb->len += len - linear;
1076
1077 return skb;
1078}
1079
Johann Baudy69e3c752009-05-18 22:11:22 -07001080static int packet_snd(struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 struct msghdr *msg, size_t len)
1082{
1083 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001084 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 struct sk_buff *skb;
1086 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -08001087 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 unsigned char *addr;
1089 int ifindex, err, reserve = 0;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001090 struct virtio_net_hdr vnet_hdr = { 0 };
1091 int offset = 0;
1092 int vnet_hdr_len;
1093 struct packet_sock *po = pkt_sk(sk);
1094 unsigned short gso_type = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095
1096 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001097 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001099
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 if (saddr == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 ifindex = po->ifindex;
1102 proto = po->num;
1103 addr = NULL;
1104 } else {
1105 err = -EINVAL;
1106 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1107 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001108 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1109 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 ifindex = saddr->sll_ifindex;
1111 proto = saddr->sll_protocol;
1112 addr = saddr->sll_addr;
1113 }
1114
1115
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001116 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 err = -ENXIO;
1118 if (dev == NULL)
1119 goto out_unlock;
1120 if (sock->type == SOCK_RAW)
1121 reserve = dev->hard_header_len;
1122
David S. Millerd5e76b02007-01-25 19:30:36 -08001123 err = -ENETDOWN;
1124 if (!(dev->flags & IFF_UP))
1125 goto out_unlock;
1126
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001127 if (po->has_vnet_hdr) {
1128 vnet_hdr_len = sizeof(vnet_hdr);
1129
1130 err = -EINVAL;
1131 if (len < vnet_hdr_len)
1132 goto out_unlock;
1133
1134 len -= vnet_hdr_len;
1135
1136 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1137 vnet_hdr_len);
1138 if (err < 0)
1139 goto out_unlock;
1140
1141 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1142 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1143 vnet_hdr.hdr_len))
1144 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1145 vnet_hdr.csum_offset + 2;
1146
1147 err = -EINVAL;
1148 if (vnet_hdr.hdr_len > len)
1149 goto out_unlock;
1150
1151 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1152 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1153 case VIRTIO_NET_HDR_GSO_TCPV4:
1154 gso_type = SKB_GSO_TCPV4;
1155 break;
1156 case VIRTIO_NET_HDR_GSO_TCPV6:
1157 gso_type = SKB_GSO_TCPV6;
1158 break;
1159 case VIRTIO_NET_HDR_GSO_UDP:
1160 gso_type = SKB_GSO_UDP;
1161 break;
1162 default:
1163 goto out_unlock;
1164 }
1165
1166 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1167 gso_type |= SKB_GSO_TCP_ECN;
1168
1169 if (vnet_hdr.gso_size == 0)
1170 goto out_unlock;
1171
1172 }
1173 }
1174
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 err = -EMSGSIZE;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001176 if (!gso_type && (len > dev->mtu+reserve))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 goto out_unlock;
1178
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001179 err = -ENOBUFS;
1180 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1181 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1182 msg->msg_flags & MSG_DONTWAIT, &err);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001183 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 goto out_unlock;
1185
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001186 skb_set_network_header(skb, reserve);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001188 err = -EINVAL;
1189 if (sock->type == SOCK_DGRAM &&
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001190 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001191 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
1193 /* Returns -EFAULT on error */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001194 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 if (err)
1196 goto out_free;
Richard Cochraned85b562010-04-07 22:41:28 +00001197 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
1198 if (err < 0)
1199 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
1201 skb->protocol = proto;
1202 skb->dev = dev;
1203 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +00001204 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001206 if (po->has_vnet_hdr) {
1207 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1208 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1209 vnet_hdr.csum_offset)) {
1210 err = -EINVAL;
1211 goto out_free;
1212 }
1213 }
1214
1215 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1216 skb_shinfo(skb)->gso_type = gso_type;
1217
1218 /* Header must be checked, and gso_segs computed. */
1219 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1220 skb_shinfo(skb)->gso_segs = 0;
1221
1222 len += vnet_hdr_len;
1223 }
1224
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 /*
1226 * Now send it
1227 */
1228
1229 err = dev_queue_xmit(skb);
1230 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1231 goto out_unlock;
1232
1233 dev_put(dev);
1234
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001235 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236
1237out_free:
1238 kfree_skb(skb);
1239out_unlock:
1240 if (dev)
1241 dev_put(dev);
1242out:
1243 return err;
1244}
1245
Johann Baudy69e3c752009-05-18 22:11:22 -07001246static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1247 struct msghdr *msg, size_t len)
1248{
Johann Baudy69e3c752009-05-18 22:11:22 -07001249 struct sock *sk = sock->sk;
1250 struct packet_sock *po = pkt_sk(sk);
1251 if (po->tx_ring.pg_vec)
1252 return tpacket_snd(po, msg);
1253 else
Johann Baudy69e3c752009-05-18 22:11:22 -07001254 return packet_snd(sock, msg, len);
1255}
1256
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257/*
1258 * Close a PACKET socket. This is fairly simple. We immediately go
1259 * to 'closed' state and remove our protocol entry in the device list.
1260 */
1261
1262static int packet_release(struct socket *sock)
1263{
1264 struct sock *sk = sock->sk;
1265 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001266 struct net *net;
Johann Baudy69e3c752009-05-18 22:11:22 -07001267 struct tpacket_req req;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
1269 if (!sk)
1270 return 0;
1271
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001272 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 po = pkt_sk(sk);
1274
stephen hemminger808f5112010-02-22 07:57:18 +00001275 spin_lock_bh(&net->packet.sklist_lock);
1276 sk_del_node_init_rcu(sk);
Eric Dumazet920de802008-11-24 00:09:29 -08001277 sock_prot_inuse_add(net, sk->sk_prot, -1);
stephen hemminger808f5112010-02-22 07:57:18 +00001278 spin_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
stephen hemminger808f5112010-02-22 07:57:18 +00001280 spin_lock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 if (po->running) {
1282 /*
stephen hemminger808f5112010-02-22 07:57:18 +00001283 * Remove from protocol table
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 po->running = 0;
1286 po->num = 0;
stephen hemminger808f5112010-02-22 07:57:18 +00001287 __dev_remove_pack(&po->prot_hook);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 __sock_put(sk);
1289 }
stephen hemminger808f5112010-02-22 07:57:18 +00001290 spin_unlock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
Johann Baudy69e3c752009-05-18 22:11:22 -07001294 memset(&req, 0, sizeof(req));
1295
1296 if (po->rx_ring.pg_vec)
1297 packet_set_ring(sk, &req, 1, 0);
1298
1299 if (po->tx_ring.pg_vec)
1300 packet_set_ring(sk, &req, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301
stephen hemminger808f5112010-02-22 07:57:18 +00001302 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 /*
1304 * Now the socket is dead. No more input will appear.
1305 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 sock_orphan(sk);
1307 sock->sk = NULL;
1308
1309 /* Purge queues */
1310
1311 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001312 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313
1314 sock_put(sk);
1315 return 0;
1316}
1317
1318/*
1319 * Attach a packet hook.
1320 */
1321
Al Viro0e11c912006-11-08 00:26:29 -08001322static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323{
1324 struct packet_sock *po = pkt_sk(sk);
1325 /*
1326 * Detach an existing hook if present.
1327 */
1328
1329 lock_sock(sk);
1330
1331 spin_lock(&po->bind_lock);
1332 if (po->running) {
1333 __sock_put(sk);
1334 po->running = 0;
1335 po->num = 0;
1336 spin_unlock(&po->bind_lock);
1337 dev_remove_pack(&po->prot_hook);
1338 spin_lock(&po->bind_lock);
1339 }
1340
1341 po->num = protocol;
1342 po->prot_hook.type = protocol;
1343 po->prot_hook.dev = dev;
1344
1345 po->ifindex = dev ? dev->ifindex : 0;
1346
1347 if (protocol == 0)
1348 goto out_unlock;
1349
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001350 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 dev_add_pack(&po->prot_hook);
1352 sock_hold(sk);
1353 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001354 } else {
1355 sk->sk_err = ENETDOWN;
1356 if (!sock_flag(sk, SOCK_DEAD))
1357 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 }
1359
1360out_unlock:
1361 spin_unlock(&po->bind_lock);
1362 release_sock(sk);
1363 return 0;
1364}
1365
1366/*
1367 * Bind a packet socket to a device
1368 */
1369
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001370static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1371 int addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001373 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 char name[15];
1375 struct net_device *dev;
1376 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001377
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 /*
1379 * Check legality
1380 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001381
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001382 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001384 strlcpy(name, uaddr->sa_data, sizeof(name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001386 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 if (dev) {
1388 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1389 dev_put(dev);
1390 }
1391 return err;
1392}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1395{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001396 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1397 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 struct net_device *dev = NULL;
1399 int err;
1400
1401
1402 /*
1403 * Check legality
1404 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001405
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 if (addr_len < sizeof(struct sockaddr_ll))
1407 return -EINVAL;
1408 if (sll->sll_family != AF_PACKET)
1409 return -EINVAL;
1410
1411 if (sll->sll_ifindex) {
1412 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001413 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 if (dev == NULL)
1415 goto out;
1416 }
1417 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1418 if (dev)
1419 dev_put(dev);
1420
1421out:
1422 return err;
1423}
1424
1425static struct proto packet_proto = {
1426 .name = "PACKET",
1427 .owner = THIS_MODULE,
1428 .obj_size = sizeof(struct packet_sock),
1429};
1430
1431/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001432 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 */
1434
Eric Paris3f378b62009-11-05 22:18:14 -08001435static int packet_create(struct net *net, struct socket *sock, int protocol,
1436 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437{
1438 struct sock *sk;
1439 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001440 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 int err;
1442
1443 if (!capable(CAP_NET_RAW))
1444 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001445 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1446 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 return -ESOCKTNOSUPPORT;
1448
1449 sock->state = SS_UNCONNECTED;
1450
1451 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001452 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 if (sk == NULL)
1454 goto out;
1455
1456 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 if (sock->type == SOCK_PACKET)
1458 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001459
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 sock_init_data(sock, sk);
1461
1462 po = pkt_sk(sk);
1463 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001464 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465
1466 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001467 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
1469 /*
1470 * Attach a protocol block
1471 */
1472
1473 spin_lock_init(&po->bind_lock);
Herbert Xu905db442009-01-30 14:12:06 -08001474 mutex_init(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001476
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 if (sock->type == SOCK_PACKET)
1478 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001479
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 po->prot_hook.af_packet_priv = sk;
1481
Al Viro0e11c912006-11-08 00:26:29 -08001482 if (proto) {
1483 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 dev_add_pack(&po->prot_hook);
1485 sock_hold(sk);
1486 po->running = 1;
1487 }
1488
stephen hemminger808f5112010-02-22 07:57:18 +00001489 spin_lock_bh(&net->packet.sklist_lock);
1490 sk_add_node_rcu(sk, &net->packet.sklist);
Eric Dumazet36804532008-11-19 14:25:35 -08001491 sock_prot_inuse_add(net, &packet_proto, 1);
stephen hemminger808f5112010-02-22 07:57:18 +00001492 spin_unlock_bh(&net->packet.sklist_lock);
1493
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001494 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495out:
1496 return err;
1497}
1498
Richard Cochraned85b562010-04-07 22:41:28 +00001499static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1500{
1501 struct sock_exterr_skb *serr;
1502 struct sk_buff *skb, *skb2;
1503 int copied, err;
1504
1505 err = -EAGAIN;
1506 skb = skb_dequeue(&sk->sk_error_queue);
1507 if (skb == NULL)
1508 goto out;
1509
1510 copied = skb->len;
1511 if (copied > len) {
1512 msg->msg_flags |= MSG_TRUNC;
1513 copied = len;
1514 }
1515 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1516 if (err)
1517 goto out_free_skb;
1518
1519 sock_recv_timestamp(msg, sk, skb);
1520
1521 serr = SKB_EXT_ERR(skb);
1522 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1523 sizeof(serr->ee), &serr->ee);
1524
1525 msg->msg_flags |= MSG_ERRQUEUE;
1526 err = copied;
1527
1528 /* Reset and regenerate socket error */
1529 spin_lock_bh(&sk->sk_error_queue.lock);
1530 sk->sk_err = 0;
1531 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1532 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1533 spin_unlock_bh(&sk->sk_error_queue.lock);
1534 sk->sk_error_report(sk);
1535 } else
1536 spin_unlock_bh(&sk->sk_error_queue.lock);
1537
1538out_free_skb:
1539 kfree_skb(skb);
1540out:
1541 return err;
1542}
1543
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544/*
1545 * Pull a packet from our receive queue and hand it to the user.
1546 * If necessary we block.
1547 */
1548
1549static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1550 struct msghdr *msg, size_t len, int flags)
1551{
1552 struct sock *sk = sock->sk;
1553 struct sk_buff *skb;
1554 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001555 struct sockaddr_ll *sll;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001556 int vnet_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557
1558 err = -EINVAL;
Richard Cochraned85b562010-04-07 22:41:28 +00001559 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 goto out;
1561
1562#if 0
1563 /* What error should we return now? EUNATTACH? */
1564 if (pkt_sk(sk)->ifindex < 0)
1565 return -ENODEV;
1566#endif
1567
Richard Cochraned85b562010-04-07 22:41:28 +00001568 if (flags & MSG_ERRQUEUE) {
1569 err = packet_recv_error(sk, msg, len);
1570 goto out;
1571 }
1572
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 * Call the generic datagram receiver. This handles all sorts
1575 * of horrible races and re-entrancy so we can forget about it
1576 * in the protocol layers.
1577 *
1578 * Now it will return ENETDOWN, if device have just gone down,
1579 * but then it will block.
1580 */
1581
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001582 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583
1584 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001585 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 * handles the blocking we don't see and worry about blocking
1587 * retries.
1588 */
1589
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001590 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 goto out;
1592
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001593 if (pkt_sk(sk)->has_vnet_hdr) {
1594 struct virtio_net_hdr vnet_hdr = { 0 };
1595
1596 err = -EINVAL;
1597 vnet_hdr_len = sizeof(vnet_hdr);
1598 if ((len -= vnet_hdr_len) < 0)
1599 goto out_free;
1600
1601 if (skb_is_gso(skb)) {
1602 struct skb_shared_info *sinfo = skb_shinfo(skb);
1603
1604 /* This is a hint as to how much should be linear. */
1605 vnet_hdr.hdr_len = skb_headlen(skb);
1606 vnet_hdr.gso_size = sinfo->gso_size;
1607 if (sinfo->gso_type & SKB_GSO_TCPV4)
1608 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1609 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1610 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1611 else if (sinfo->gso_type & SKB_GSO_UDP)
1612 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1613 else if (sinfo->gso_type & SKB_GSO_FCOE)
1614 goto out_free;
1615 else
1616 BUG();
1617 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1618 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1619 } else
1620 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1621
1622 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1623 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1624 vnet_hdr.csum_start = skb->csum_start -
1625 skb_headroom(skb);
1626 vnet_hdr.csum_offset = skb->csum_offset;
1627 } /* else everything is zero */
1628
1629 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1630 vnet_hdr_len);
1631 if (err < 0)
1632 goto out_free;
1633 }
1634
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001636 * If the address length field is there to be filled in, we fill
1637 * it in now.
1638 */
1639
Herbert Xuffbc6112007-02-04 23:33:10 -08001640 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001641 if (sock->type == SOCK_PACKET)
1642 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1643 else
1644 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1645
1646 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 * You lose any data beyond the buffer you gave. If it worries a
1648 * user program they can ask the device for its MTU anyway.
1649 */
1650
1651 copied = skb->len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001652 if (copied > len) {
1653 copied = len;
1654 msg->msg_flags |= MSG_TRUNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 }
1656
1657 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1658 if (err)
1659 goto out_free;
1660
Neil Horman3b885782009-10-12 13:26:31 -07001661 sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662
1663 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001664 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1665 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666
Herbert Xu8dc41942007-02-04 23:31:32 -08001667 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001668 struct tpacket_auxdata aux;
1669
1670 aux.tp_status = TP_STATUS_USER;
1671 if (skb->ip_summed == CHECKSUM_PARTIAL)
1672 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1673 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1674 aux.tp_snaplen = skb->len;
1675 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001676 aux.tp_net = skb_network_offset(skb);
Eric Dumazet05423b22009-10-26 18:40:35 -07001677 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001678
1679 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001680 }
1681
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 /*
1683 * Free or return the buffer as appropriate. Again this
1684 * hides all the races and re-entrancy issues from us.
1685 */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001686 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687
1688out_free:
1689 skb_free_datagram(sk, skb);
1690out:
1691 return err;
1692}
1693
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1695 int *uaddr_len, int peer)
1696{
1697 struct net_device *dev;
1698 struct sock *sk = sock->sk;
1699
1700 if (peer)
1701 return -EOPNOTSUPP;
1702
1703 uaddr->sa_family = AF_PACKET;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001704 rcu_read_lock();
1705 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1706 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 strlcpy(uaddr->sa_data, dev->name, 15);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001708 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 memset(uaddr->sa_data, 0, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001710 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 *uaddr_len = sizeof(*uaddr);
1712
1713 return 0;
1714}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715
1716static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1717 int *uaddr_len, int peer)
1718{
1719 struct net_device *dev;
1720 struct sock *sk = sock->sk;
1721 struct packet_sock *po = pkt_sk(sk);
Cyrill Gorcunov13cfa97b2009-11-08 05:51:19 +00001722 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723
1724 if (peer)
1725 return -EOPNOTSUPP;
1726
1727 sll->sll_family = AF_PACKET;
1728 sll->sll_ifindex = po->ifindex;
1729 sll->sll_protocol = po->num;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001730 rcu_read_lock();
1731 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 if (dev) {
1733 sll->sll_hatype = dev->type;
1734 sll->sll_halen = dev->addr_len;
1735 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 } else {
1737 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1738 sll->sll_halen = 0;
1739 }
Eric Dumazet654d1f82009-11-02 10:43:32 +01001740 rcu_read_unlock();
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001741 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
1743 return 0;
1744}
1745
Wang Chen2aeb0b82008-07-14 20:49:46 -07001746static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1747 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748{
1749 switch (i->type) {
1750 case PACKET_MR_MULTICAST:
Jiri Pirko11625632010-03-02 20:40:01 +00001751 if (i->alen != dev->addr_len)
1752 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 if (what > 0)
Jiri Pirko22bedad32010-04-01 21:22:57 +00001754 return dev_mc_add(dev, i->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 else
Jiri Pirko22bedad32010-04-01 21:22:57 +00001756 return dev_mc_del(dev, i->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 break;
1758 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001759 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 break;
1761 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001762 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 break;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001764 case PACKET_MR_UNICAST:
Jiri Pirko11625632010-03-02 20:40:01 +00001765 if (i->alen != dev->addr_len)
1766 return -EINVAL;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001767 if (what > 0)
Jiri Pirkoa748ee22010-04-01 21:22:09 +00001768 return dev_uc_add(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001769 else
Jiri Pirkoa748ee22010-04-01 21:22:09 +00001770 return dev_uc_del(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001771 break;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001772 default:
1773 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001775 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776}
1777
1778static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1779{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001780 for ( ; i; i = i->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 if (i->ifindex == dev->ifindex)
1782 packet_dev_mc(dev, i, what);
1783 }
1784}
1785
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001786static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787{
1788 struct packet_sock *po = pkt_sk(sk);
1789 struct packet_mclist *ml, *i;
1790 struct net_device *dev;
1791 int err;
1792
1793 rtnl_lock();
1794
1795 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001796 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 if (!dev)
1798 goto done;
1799
1800 err = -EINVAL;
Jiri Pirko11625632010-03-02 20:40:01 +00001801 if (mreq->mr_alen > dev->addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 goto done;
1803
1804 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001805 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 if (i == NULL)
1807 goto done;
1808
1809 err = 0;
1810 for (ml = po->mclist; ml; ml = ml->next) {
1811 if (ml->ifindex == mreq->mr_ifindex &&
1812 ml->type == mreq->mr_type &&
1813 ml->alen == mreq->mr_alen &&
1814 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1815 ml->count++;
1816 /* Free the new element ... */
1817 kfree(i);
1818 goto done;
1819 }
1820 }
1821
1822 i->type = mreq->mr_type;
1823 i->ifindex = mreq->mr_ifindex;
1824 i->alen = mreq->mr_alen;
1825 memcpy(i->addr, mreq->mr_address, i->alen);
1826 i->count = 1;
1827 i->next = po->mclist;
1828 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001829 err = packet_dev_mc(dev, i, 1);
1830 if (err) {
1831 po->mclist = i->next;
1832 kfree(i);
1833 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
1835done:
1836 rtnl_unlock();
1837 return err;
1838}
1839
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001840static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841{
1842 struct packet_mclist *ml, **mlp;
1843
1844 rtnl_lock();
1845
1846 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1847 if (ml->ifindex == mreq->mr_ifindex &&
1848 ml->type == mreq->mr_type &&
1849 ml->alen == mreq->mr_alen &&
1850 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1851 if (--ml->count == 0) {
1852 struct net_device *dev;
1853 *mlp = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001854 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1855 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 kfree(ml);
1858 }
1859 rtnl_unlock();
1860 return 0;
1861 }
1862 }
1863 rtnl_unlock();
1864 return -EADDRNOTAVAIL;
1865}
1866
1867static void packet_flush_mclist(struct sock *sk)
1868{
1869 struct packet_sock *po = pkt_sk(sk);
1870 struct packet_mclist *ml;
1871
1872 if (!po->mclist)
1873 return;
1874
1875 rtnl_lock();
1876 while ((ml = po->mclist) != NULL) {
1877 struct net_device *dev;
1878
1879 po->mclist = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001880 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1881 if (dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 kfree(ml);
1884 }
1885 rtnl_unlock();
1886}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887
1888static int
David S. Millerb7058842009-09-30 16:12:20 -07001889packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890{
1891 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001892 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 int ret;
1894
1895 if (level != SOL_PACKET)
1896 return -ENOPROTOOPT;
1897
Johann Baudy69e3c752009-05-18 22:11:22 -07001898 switch (optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001899 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 case PACKET_DROP_MEMBERSHIP:
1901 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001902 struct packet_mreq_max mreq;
1903 int len = optlen;
1904 memset(&mreq, 0, sizeof(mreq));
1905 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001907 if (len > sizeof(mreq))
1908 len = sizeof(mreq);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001909 if (copy_from_user(&mreq, optval, len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001911 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1912 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 if (optname == PACKET_ADD_MEMBERSHIP)
1914 ret = packet_mc_add(sk, &mreq);
1915 else
1916 ret = packet_mc_drop(sk, &mreq);
1917 return ret;
1918 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001919
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 case PACKET_RX_RING:
Johann Baudy69e3c752009-05-18 22:11:22 -07001921 case PACKET_TX_RING:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 {
1923 struct tpacket_req req;
1924
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001925 if (optlen < sizeof(req))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 return -EINVAL;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001927 if (pkt_sk(sk)->has_vnet_hdr)
1928 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001929 if (copy_from_user(&req, optval, sizeof(req)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 return -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -07001931 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932 }
1933 case PACKET_COPY_THRESH:
1934 {
1935 int val;
1936
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001937 if (optlen != sizeof(val))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001939 if (copy_from_user(&val, optval, sizeof(val)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 return -EFAULT;
1941
1942 pkt_sk(sk)->copy_thresh = val;
1943 return 0;
1944 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001945 case PACKET_VERSION:
1946 {
1947 int val;
1948
1949 if (optlen != sizeof(val))
1950 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001951 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001952 return -EBUSY;
1953 if (copy_from_user(&val, optval, sizeof(val)))
1954 return -EFAULT;
1955 switch (val) {
1956 case TPACKET_V1:
1957 case TPACKET_V2:
1958 po->tp_version = val;
1959 return 0;
1960 default:
1961 return -EINVAL;
1962 }
1963 }
Patrick McHardy8913336a2008-07-18 18:05:19 -07001964 case PACKET_RESERVE:
1965 {
1966 unsigned int val;
1967
1968 if (optlen != sizeof(val))
1969 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001970 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardy8913336a2008-07-18 18:05:19 -07001971 return -EBUSY;
1972 if (copy_from_user(&val, optval, sizeof(val)))
1973 return -EFAULT;
1974 po->tp_reserve = val;
1975 return 0;
1976 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001977 case PACKET_LOSS:
1978 {
1979 unsigned int val;
1980
1981 if (optlen != sizeof(val))
1982 return -EINVAL;
1983 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1984 return -EBUSY;
1985 if (copy_from_user(&val, optval, sizeof(val)))
1986 return -EFAULT;
1987 po->tp_loss = !!val;
1988 return 0;
1989 }
Herbert Xu8dc41942007-02-04 23:31:32 -08001990 case PACKET_AUXDATA:
1991 {
1992 int val;
1993
1994 if (optlen < sizeof(val))
1995 return -EINVAL;
1996 if (copy_from_user(&val, optval, sizeof(val)))
1997 return -EFAULT;
1998
1999 po->auxdata = !!val;
2000 return 0;
2001 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002002 case PACKET_ORIGDEV:
2003 {
2004 int val;
2005
2006 if (optlen < sizeof(val))
2007 return -EINVAL;
2008 if (copy_from_user(&val, optval, sizeof(val)))
2009 return -EFAULT;
2010
2011 po->origdev = !!val;
2012 return 0;
2013 }
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002014 case PACKET_VNET_HDR:
2015 {
2016 int val;
2017
2018 if (sock->type != SOCK_RAW)
2019 return -EINVAL;
2020 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2021 return -EBUSY;
2022 if (optlen < sizeof(val))
2023 return -EINVAL;
2024 if (copy_from_user(&val, optval, sizeof(val)))
2025 return -EFAULT;
2026
2027 po->has_vnet_hdr = !!val;
2028 return 0;
2029 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 default:
2031 return -ENOPROTOOPT;
2032 }
2033}
2034
2035static int packet_getsockopt(struct socket *sock, int level, int optname,
2036 char __user *optval, int __user *optlen)
2037{
2038 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08002039 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 struct sock *sk = sock->sk;
2041 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08002042 void *data;
2043 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044
2045 if (level != SOL_PACKET)
2046 return -ENOPROTOOPT;
2047
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002048 if (get_user(len, optlen))
2049 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050
2051 if (len < 0)
2052 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002053
Johann Baudy69e3c752009-05-18 22:11:22 -07002054 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 if (len > sizeof(struct tpacket_stats))
2057 len = sizeof(struct tpacket_stats);
2058 spin_lock_bh(&sk->sk_receive_queue.lock);
2059 st = po->stats;
2060 memset(&po->stats, 0, sizeof(st));
2061 spin_unlock_bh(&sk->sk_receive_queue.lock);
2062 st.tp_packets += st.tp_drops;
2063
Herbert Xu8dc41942007-02-04 23:31:32 -08002064 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08002066 case PACKET_AUXDATA:
2067 if (len > sizeof(int))
2068 len = sizeof(int);
2069 val = po->auxdata;
2070
2071 data = &val;
2072 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002073 case PACKET_ORIGDEV:
2074 if (len > sizeof(int))
2075 len = sizeof(int);
2076 val = po->origdev;
2077
2078 data = &val;
2079 break;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002080 case PACKET_VNET_HDR:
2081 if (len > sizeof(int))
2082 len = sizeof(int);
2083 val = po->has_vnet_hdr;
2084
2085 data = &val;
2086 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002087 case PACKET_VERSION:
2088 if (len > sizeof(int))
2089 len = sizeof(int);
2090 val = po->tp_version;
2091 data = &val;
2092 break;
2093 case PACKET_HDRLEN:
2094 if (len > sizeof(int))
2095 len = sizeof(int);
2096 if (copy_from_user(&val, optval, len))
2097 return -EFAULT;
2098 switch (val) {
2099 case TPACKET_V1:
2100 val = sizeof(struct tpacket_hdr);
2101 break;
2102 case TPACKET_V2:
2103 val = sizeof(struct tpacket2_hdr);
2104 break;
2105 default:
2106 return -EINVAL;
2107 }
2108 data = &val;
2109 break;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002110 case PACKET_RESERVE:
2111 if (len > sizeof(unsigned int))
2112 len = sizeof(unsigned int);
2113 val = po->tp_reserve;
2114 data = &val;
2115 break;
Johann Baudy69e3c752009-05-18 22:11:22 -07002116 case PACKET_LOSS:
2117 if (len > sizeof(unsigned int))
2118 len = sizeof(unsigned int);
2119 val = po->tp_loss;
2120 data = &val;
2121 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 default:
2123 return -ENOPROTOOPT;
2124 }
2125
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002126 if (put_user(len, optlen))
2127 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08002128 if (copy_to_user(optval, data, len))
2129 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002130 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131}
2132
2133
2134static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2135{
2136 struct sock *sk;
2137 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08002138 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002139 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140
stephen hemminger808f5112010-02-22 07:57:18 +00002141 rcu_read_lock();
2142 sk_for_each_rcu(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 struct packet_sock *po = pkt_sk(sk);
2144
2145 switch (msg) {
2146 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 if (po->mclist)
2148 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07002149 /* fallthrough */
2150
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 case NETDEV_DOWN:
2152 if (dev->ifindex == po->ifindex) {
2153 spin_lock(&po->bind_lock);
2154 if (po->running) {
2155 __dev_remove_pack(&po->prot_hook);
2156 __sock_put(sk);
2157 po->running = 0;
2158 sk->sk_err = ENETDOWN;
2159 if (!sock_flag(sk, SOCK_DEAD))
2160 sk->sk_error_report(sk);
2161 }
2162 if (msg == NETDEV_UNREGISTER) {
2163 po->ifindex = -1;
2164 po->prot_hook.dev = NULL;
2165 }
2166 spin_unlock(&po->bind_lock);
2167 }
2168 break;
2169 case NETDEV_UP:
stephen hemminger808f5112010-02-22 07:57:18 +00002170 if (dev->ifindex == po->ifindex) {
2171 spin_lock(&po->bind_lock);
2172 if (po->num && !po->running) {
2173 dev_add_pack(&po->prot_hook);
2174 sock_hold(sk);
2175 po->running = 1;
2176 }
2177 spin_unlock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 break;
2180 }
2181 }
stephen hemminger808f5112010-02-22 07:57:18 +00002182 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183 return NOTIFY_DONE;
2184}
2185
2186
2187static int packet_ioctl(struct socket *sock, unsigned int cmd,
2188 unsigned long arg)
2189{
2190 struct sock *sk = sock->sk;
2191
Johann Baudy69e3c752009-05-18 22:11:22 -07002192 switch (cmd) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002193 case SIOCOUTQ:
2194 {
2195 int amount = sk_wmem_alloc_get(sk);
Eric Dumazet31e6d362009-06-17 19:05:41 -07002196
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002197 return put_user(amount, (int __user *)arg);
2198 }
2199 case SIOCINQ:
2200 {
2201 struct sk_buff *skb;
2202 int amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002204 spin_lock_bh(&sk->sk_receive_queue.lock);
2205 skb = skb_peek(&sk->sk_receive_queue);
2206 if (skb)
2207 amount = skb->len;
2208 spin_unlock_bh(&sk->sk_receive_queue.lock);
2209 return put_user(amount, (int __user *)arg);
2210 }
2211 case SIOCGSTAMP:
2212 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2213 case SIOCGSTAMPNS:
2214 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002215
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216#ifdef CONFIG_INET
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002217 case SIOCADDRT:
2218 case SIOCDELRT:
2219 case SIOCDARP:
2220 case SIOCGARP:
2221 case SIOCSARP:
2222 case SIOCGIFADDR:
2223 case SIOCSIFADDR:
2224 case SIOCGIFBRDADDR:
2225 case SIOCSIFBRDADDR:
2226 case SIOCGIFNETMASK:
2227 case SIOCSIFNETMASK:
2228 case SIOCGIFDSTADDR:
2229 case SIOCSIFDSTADDR:
2230 case SIOCSIFFLAGS:
2231 if (!net_eq(sock_net(sk), &init_net))
2232 return -ENOIOCTLCMD;
2233 return inet_dgram_ops.ioctl(sock, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234#endif
2235
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002236 default:
2237 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 }
2239 return 0;
2240}
2241
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002242static unsigned int packet_poll(struct file *file, struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 poll_table *wait)
2244{
2245 struct sock *sk = sock->sk;
2246 struct packet_sock *po = pkt_sk(sk);
2247 unsigned int mask = datagram_poll(file, sock, wait);
2248
2249 spin_lock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002250 if (po->rx_ring.pg_vec) {
2251 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 mask |= POLLIN | POLLRDNORM;
2253 }
2254 spin_unlock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002255 spin_lock_bh(&sk->sk_write_queue.lock);
2256 if (po->tx_ring.pg_vec) {
2257 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2258 mask |= POLLOUT | POLLWRNORM;
2259 }
2260 spin_unlock_bh(&sk->sk_write_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 return mask;
2262}
2263
2264
2265/* Dirty? Well, I still did not learn better way to account
2266 * for user mmaps.
2267 */
2268
2269static void packet_mm_open(struct vm_area_struct *vma)
2270{
2271 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002272 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002274
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 if (sk)
2276 atomic_inc(&pkt_sk(sk)->mapped);
2277}
2278
2279static void packet_mm_close(struct vm_area_struct *vma)
2280{
2281 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002282 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002284
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 if (sk)
2286 atomic_dec(&pkt_sk(sk)->mapped);
2287}
2288
Alexey Dobriyanf0f37e2f2009-09-27 22:29:37 +04002289static const struct vm_operations_struct packet_mmap_ops = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002290 .open = packet_mm_open,
2291 .close = packet_mm_close,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292};
2293
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002294static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295{
2296 int i;
2297
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002298 for (i = 0; i < len; i++) {
2299 if (likely(pg_vec[i]))
2300 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 }
2302 kfree(pg_vec);
2303}
2304
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002305static inline char *alloc_one_pg_vec_page(unsigned long order)
2306{
Eric Dumazet719bfea2009-04-15 03:39:52 -07002307 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2308
2309 return (char *) __get_free_pages(gfp_flags, order);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002310}
2311
2312static char **alloc_pg_vec(struct tpacket_req *req, int order)
2313{
2314 unsigned int block_nr = req->tp_block_nr;
2315 char **pg_vec;
2316 int i;
2317
2318 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2319 if (unlikely(!pg_vec))
2320 goto out;
2321
2322 for (i = 0; i < block_nr; i++) {
2323 pg_vec[i] = alloc_one_pg_vec_page(order);
2324 if (unlikely(!pg_vec[i]))
2325 goto out_free_pgvec;
2326 }
2327
2328out:
2329 return pg_vec;
2330
2331out_free_pgvec:
2332 free_pg_vec(pg_vec, order, block_nr);
2333 pg_vec = NULL;
2334 goto out;
2335}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336
Johann Baudy69e3c752009-05-18 22:11:22 -07002337static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2338 int closing, int tx_ring)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339{
2340 char **pg_vec = NULL;
2341 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08002342 int was_running, order = 0;
Johann Baudy69e3c752009-05-18 22:11:22 -07002343 struct packet_ring_buffer *rb;
2344 struct sk_buff_head *rb_queue;
Al Viro0e11c912006-11-08 00:26:29 -08002345 __be16 num;
Johann Baudy69e3c752009-05-18 22:11:22 -07002346 int err;
2347
2348 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2349 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2350
2351 err = -EBUSY;
2352 if (!closing) {
2353 if (atomic_read(&po->mapped))
2354 goto out;
2355 if (atomic_read(&rb->pending))
2356 goto out;
2357 }
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002358
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 if (req->tp_block_nr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 /* Sanity tests and some calculations */
Johann Baudy69e3c752009-05-18 22:11:22 -07002361 err = -EBUSY;
2362 if (unlikely(rb->pg_vec))
2363 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002365 switch (po->tp_version) {
2366 case TPACKET_V1:
2367 po->tp_hdrlen = TPACKET_HDRLEN;
2368 break;
2369 case TPACKET_V2:
2370 po->tp_hdrlen = TPACKET2_HDRLEN;
2371 break;
2372 }
2373
Johann Baudy69e3c752009-05-18 22:11:22 -07002374 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002375 if (unlikely((int)req->tp_block_size <= 0))
Johann Baudy69e3c752009-05-18 22:11:22 -07002376 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002377 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002378 goto out;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002379 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
Johann Baudy69e3c752009-05-18 22:11:22 -07002380 po->tp_reserve))
2381 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002382 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002383 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384
Johann Baudy69e3c752009-05-18 22:11:22 -07002385 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2386 if (unlikely(rb->frames_per_block <= 0))
2387 goto out;
2388 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2389 req->tp_frame_nr))
2390 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
2392 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002393 order = get_order(req->tp_block_size);
2394 pg_vec = alloc_pg_vec(req, order);
2395 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002396 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002397 }
2398 /* Done */
2399 else {
2400 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002401 if (unlikely(req->tp_frame_nr))
Johann Baudy69e3c752009-05-18 22:11:22 -07002402 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 }
2404
2405 lock_sock(sk);
2406
2407 /* Detach socket from network */
2408 spin_lock(&po->bind_lock);
2409 was_running = po->running;
2410 num = po->num;
2411 if (was_running) {
2412 __dev_remove_pack(&po->prot_hook);
2413 po->num = 0;
2414 po->running = 0;
2415 __sock_put(sk);
2416 }
2417 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002418
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 synchronize_net();
2420
2421 err = -EBUSY;
Herbert Xu905db442009-01-30 14:12:06 -08002422 mutex_lock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 if (closing || atomic_read(&po->mapped) == 0) {
2424 err = 0;
2425#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
Johann Baudy69e3c752009-05-18 22:11:22 -07002426 spin_lock_bh(&rb_queue->lock);
2427 pg_vec = XC(rb->pg_vec, pg_vec);
2428 rb->frame_max = (req->tp_frame_nr - 1);
2429 rb->head = 0;
2430 rb->frame_size = req->tp_frame_size;
2431 spin_unlock_bh(&rb_queue->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432
Johann Baudy69e3c752009-05-18 22:11:22 -07002433 order = XC(rb->pg_vec_order, order);
2434 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435
Johann Baudy69e3c752009-05-18 22:11:22 -07002436 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2437 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2438 tpacket_rcv : packet_rcv;
2439 skb_queue_purge(rb_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440#undef XC
2441 if (atomic_read(&po->mapped))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002442 pr_err("packet_mmap: vma is busy: %d\n",
2443 atomic_read(&po->mapped));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 }
Herbert Xu905db442009-01-30 14:12:06 -08002445 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446
2447 spin_lock(&po->bind_lock);
2448 if (was_running && !po->running) {
2449 sock_hold(sk);
2450 po->running = 1;
2451 po->num = num;
2452 dev_add_pack(&po->prot_hook);
2453 }
2454 spin_unlock(&po->bind_lock);
2455
2456 release_sock(sk);
2457
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 if (pg_vec)
2459 free_pg_vec(pg_vec, order, req->tp_block_nr);
2460out:
2461 return err;
2462}
2463
Johann Baudy69e3c752009-05-18 22:11:22 -07002464static int packet_mmap(struct file *file, struct socket *sock,
2465 struct vm_area_struct *vma)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466{
2467 struct sock *sk = sock->sk;
2468 struct packet_sock *po = pkt_sk(sk);
Johann Baudy69e3c752009-05-18 22:11:22 -07002469 unsigned long size, expected_size;
2470 struct packet_ring_buffer *rb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 unsigned long start;
2472 int err = -EINVAL;
2473 int i;
2474
2475 if (vma->vm_pgoff)
2476 return -EINVAL;
2477
Herbert Xu905db442009-01-30 14:12:06 -08002478 mutex_lock(&po->pg_vec_lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002479
2480 expected_size = 0;
2481 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2482 if (rb->pg_vec) {
2483 expected_size += rb->pg_vec_len
2484 * rb->pg_vec_pages
2485 * PAGE_SIZE;
2486 }
2487 }
2488
2489 if (expected_size == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002491
2492 size = vma->vm_end - vma->vm_start;
2493 if (size != expected_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 goto out;
2495
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 start = vma->vm_start;
Johann Baudy69e3c752009-05-18 22:11:22 -07002497 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2498 if (rb->pg_vec == NULL)
2499 continue;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002500
Johann Baudy69e3c752009-05-18 22:11:22 -07002501 for (i = 0; i < rb->pg_vec_len; i++) {
2502 struct page *page = virt_to_page(rb->pg_vec[i]);
2503 int pg_num;
2504
2505 for (pg_num = 0; pg_num < rb->pg_vec_pages;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002506 pg_num++, page++) {
Johann Baudy69e3c752009-05-18 22:11:22 -07002507 err = vm_insert_page(vma, start, page);
2508 if (unlikely(err))
2509 goto out;
2510 start += PAGE_SIZE;
2511 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002512 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002514
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002515 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 vma->vm_ops = &packet_mmap_ops;
2517 err = 0;
2518
2519out:
Herbert Xu905db442009-01-30 14:12:06 -08002520 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521 return err;
2522}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002524static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 .family = PF_PACKET,
2526 .owner = THIS_MODULE,
2527 .release = packet_release,
2528 .bind = packet_bind_spkt,
2529 .connect = sock_no_connect,
2530 .socketpair = sock_no_socketpair,
2531 .accept = sock_no_accept,
2532 .getname = packet_getname_spkt,
2533 .poll = datagram_poll,
2534 .ioctl = packet_ioctl,
2535 .listen = sock_no_listen,
2536 .shutdown = sock_no_shutdown,
2537 .setsockopt = sock_no_setsockopt,
2538 .getsockopt = sock_no_getsockopt,
2539 .sendmsg = packet_sendmsg_spkt,
2540 .recvmsg = packet_recvmsg,
2541 .mmap = sock_no_mmap,
2542 .sendpage = sock_no_sendpage,
2543};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002545static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 .family = PF_PACKET,
2547 .owner = THIS_MODULE,
2548 .release = packet_release,
2549 .bind = packet_bind,
2550 .connect = sock_no_connect,
2551 .socketpair = sock_no_socketpair,
2552 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002553 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 .poll = packet_poll,
2555 .ioctl = packet_ioctl,
2556 .listen = sock_no_listen,
2557 .shutdown = sock_no_shutdown,
2558 .setsockopt = packet_setsockopt,
2559 .getsockopt = packet_getsockopt,
2560 .sendmsg = packet_sendmsg,
2561 .recvmsg = packet_recvmsg,
2562 .mmap = packet_mmap,
2563 .sendpage = sock_no_sendpage,
2564};
2565
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002566static const struct net_proto_family packet_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 .family = PF_PACKET,
2568 .create = packet_create,
2569 .owner = THIS_MODULE,
2570};
2571
2572static struct notifier_block packet_netdev_notifier = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002573 .notifier_call = packet_notifier,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574};
2575
2576#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577
2578static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
stephen hemminger808f5112010-02-22 07:57:18 +00002579 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580{
Denis V. Luneve372c412007-11-19 22:31:54 -08002581 struct net *net = seq_file_net(seq);
stephen hemminger808f5112010-02-22 07:57:18 +00002582
2583 rcu_read_lock();
2584 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585}
2586
2587static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2588{
Herbert Xu1bf40952007-12-16 14:04:02 -08002589 struct net *net = seq_file_net(seq);
stephen hemminger808f5112010-02-22 07:57:18 +00002590 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591}
2592
2593static void packet_seq_stop(struct seq_file *seq, void *v)
stephen hemminger808f5112010-02-22 07:57:18 +00002594 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595{
stephen hemminger808f5112010-02-22 07:57:18 +00002596 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597}
2598
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002599static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600{
2601 if (v == SEQ_START_TOKEN)
2602 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2603 else {
Li Zefanb7ceabd2010-02-08 23:19:29 +00002604 struct sock *s = sk_entry(v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 const struct packet_sock *po = pkt_sk(s);
2606
2607 seq_printf(seq,
2608 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2609 s,
2610 atomic_read(&s->sk_refcnt),
2611 s->sk_type,
2612 ntohs(po->num),
2613 po->ifindex,
2614 po->running,
2615 atomic_read(&s->sk_rmem_alloc),
2616 sock_i_uid(s),
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002617 sock_i_ino(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 }
2619
2620 return 0;
2621}
2622
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002623static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624 .start = packet_seq_start,
2625 .next = packet_seq_next,
2626 .stop = packet_seq_stop,
2627 .show = packet_seq_show,
2628};
2629
2630static int packet_seq_open(struct inode *inode, struct file *file)
2631{
Denis V. Luneve372c412007-11-19 22:31:54 -08002632 return seq_open_net(inode, file, &packet_seq_ops,
2633 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634}
2635
Arjan van de Venda7071d2007-02-12 00:55:36 -08002636static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 .owner = THIS_MODULE,
2638 .open = packet_seq_open,
2639 .read = seq_read,
2640 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002641 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642};
2643
2644#endif
2645
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002646static int __net_init packet_net_init(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002647{
stephen hemminger808f5112010-02-22 07:57:18 +00002648 spin_lock_init(&net->packet.sklist_lock);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002649 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002650
2651 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2652 return -ENOMEM;
2653
2654 return 0;
2655}
2656
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002657static void __net_exit packet_net_exit(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002658{
2659 proc_net_remove(net, "packet");
2660}
2661
2662static struct pernet_operations packet_net_ops = {
2663 .init = packet_net_init,
2664 .exit = packet_net_exit,
2665};
2666
2667
Linus Torvalds1da177e2005-04-16 15:20:36 -07002668static void __exit packet_exit(void)
2669{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002671 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672 sock_unregister(PF_PACKET);
2673 proto_unregister(&packet_proto);
2674}
2675
2676static int __init packet_init(void)
2677{
2678 int rc = proto_register(&packet_proto, 0);
2679
2680 if (rc != 0)
2681 goto out;
2682
2683 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002684 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686out:
2687 return rc;
2688}
2689
2690module_init(packet_init);
2691module_exit(packet_exit);
2692MODULE_LICENSE("GPL");
2693MODULE_ALIAS_NETPROTO(PF_PACKET);