/*
       * INET		An implementation of the TCP/IP protocol suite for the LINUX
       *		operating system.  INET is implemented using the  BSD Socket
       *		interface as the means of communication with the user level.
       *
       *		PACKET - implements raw packet sockets.
       *
       * Version:	$Id: af_packet.c,v 1.47 2000/12/08 17:15:54 davem Exp $
       *
       * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
       *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
       *		Alan Cox, <gw4pts@gw4pts.ampr.org>
       *
       * Fixes:	
       *		Alan Cox	:	verify_area() now used correctly
       *		Alan Cox	:	new skbuff lists, look ma no backlogs!
       *		Alan Cox	:	tidied skbuff lists.
       *		Alan Cox	:	Now uses generic datagram routines I
       *					added. Also fixed the peek/read crash
       *					from all old Linux datagram code.
       *		Alan Cox	:	Uses the improved datagram code.
       *		Alan Cox	:	Added NULL's for socket options.
       *		Alan Cox	:	Re-commented the code.
       *		Alan Cox	:	Use new kernel side addressing
       *		Rob Janssen	:	Correct MTU usage.
       *		Dave Platt	:	Counter leaks caused by incorrect
       *					interrupt locking and some slightly
       *					dubious gcc output. Can you read
       *					compiler: it said _VOLATILE_
       *	Richard Kooijman	:	Timestamp fixes.
       *		Alan Cox	:	New buffers. Use sk->mac.raw.
       *		Alan Cox	:	sendmsg/recvmsg support.
       *		Alan Cox	:	Protocol setting support
       *	Alexey Kuznetsov	:	Untied from IPv4 stack.
       *	Cyrus Durgin		:	Fixed kerneld for kmod.
       *	Michal Ostrowski        :       Module initialization cleanup.
       *
       *		This program is free software; you can redistribute it and/or
       *		modify it under the terms of the GNU General Public License
       *		as published by the Free Software Foundation; either version
       *		2 of the License, or (at your option) any later version.
       *
       */
       
      #include <linux/config.h>
      #include <linux/types.h>
      #include <linux/sched.h>
      #include <linux/mm.h>
      #include <linux/fcntl.h>
      #include <linux/socket.h>
      #include <linux/in.h>
      #include <linux/inet.h>
      #include <linux/netdevice.h>
      #include <linux/if_packet.h>
      #include <linux/wireless.h>
      #include <linux/kmod.h>
      #include <net/ip.h>
      #include <net/protocol.h>
      #include <linux/skbuff.h>
      #include <net/sock.h>
      #include <linux/errno.h>
      #include <linux/timer.h>
      #include <asm/system.h>
      #include <asm/uaccess.h>
      #include <linux/proc_fs.h>
      #include <linux/poll.h>
      #include <linux/module.h>
      #include <linux/init.h>
      #include <linux/if_bridge.h>
      
      #ifdef CONFIG_NET_DIVERT
      #include <linux/divert.h>
      #endif /* CONFIG_NET_DIVERT */
      
      #ifdef CONFIG_INET
      #include <net/inet_common.h>
      #endif
      
      #ifdef CONFIG_DLCI
      extern int dlci_ioctl(unsigned int, void*);
      #endif
      
      #define CONFIG_SOCK_PACKET	1
      
      /*
         Proposed replacement for SIOC{ADD,DEL}MULTI and
         IFF_PROMISC, IFF_ALLMULTI flags.
      
         It is more expensive, but I believe,
         it is really correct solution: reentereble, safe and fault tolerant.
      
         IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
         reference count and global flag, so that real status is
         (gflag|(count != 0)), so that we can use obsolete faulty interface
         not harming clever users.
       */
      #define CONFIG_PACKET_MULTICAST	1
      
      /*
         Assumptions:
         - if device has no dev->hard_header routine, it adds and removes ll header
           inside itself. In this case ll header is invisible outside of device,
           but higher levels still should reserve dev->hard_header_len.
           Some devices are enough clever to reallocate skb, when header
           will not fit to reserved space (tunnel), another ones are silly
           (PPP).
         - packet socket receives packets with pulled ll header,
           so that SOCK_RAW should push it back.
      
      On receive:
      -----------
      
      Incoming, dev->hard_header!=NULL
         mac.raw -> ll header
         data    -> data
      
      Outgoing, dev->hard_header!=NULL
         mac.raw -> ll header
         data    -> ll header
      
      Incoming, dev->hard_header==NULL
         mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
                    PPP makes it, that is wrong, because introduce assymetry
      	      between rx and tx paths.
         data    -> data
      
      Outgoing, dev->hard_header==NULL
         mac.raw -> data. ll header is still not built!
         data    -> data
      
      Resume
        If dev->hard_header==NULL we are unlikely to restore sensible ll header.
      
      
      On transmit:
      ------------
      
      dev->hard_header != NULL
         mac.raw -> ll header
         data    -> ll header
      
      dev->hard_header == NULL (ll header is added by device, we cannot control it)
         mac.raw -> data
         data -> data
      
         We should set nh.raw on output to correct posistion,
         packet classifier depends on it.
       */
      
      /* List of all packet sockets. */
      static struct sock * packet_sklist = NULL;
      static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
      
      atomic_t packet_socks_nr;
      
      
      /* Private packet socket structures. */
      
      #ifdef CONFIG_PACKET_MULTICAST
      struct packet_mclist
      {
      	struct packet_mclist	*next;
      	int			ifindex;
      	int			count;
      	unsigned short		type;
      	unsigned short		alen;
      	unsigned char		addr[8];
      };
      #endif
      #ifdef CONFIG_PACKET_MMAP
      static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
      #endif
      
      static void packet_flush_mclist(struct sock *sk);
      
      struct packet_opt
      {
      	struct packet_type	prot_hook;
      	spinlock_t		bind_lock;
      	char			running;	/* prot_hook is attached*/
      	int			ifindex;	/* bound device		*/
      	struct tpacket_stats	stats;
      #ifdef CONFIG_PACKET_MULTICAST
      	struct packet_mclist	*mclist;
      #endif
      #ifdef CONFIG_PACKET_MMAP
      	atomic_t		mapped;
      	unsigned long		*pg_vec;
      	unsigned int		pg_vec_order;
      	unsigned int		pg_vec_pages;
      	unsigned int		pg_vec_len;
      
      	struct tpacket_hdr	**iovec;
      	unsigned int		frame_size;
      	unsigned int		iovmax;
      	unsigned int		head;
      	int			copy_thresh;
      #endif
      };
      
 201  void packet_sock_destruct(struct sock *sk)
      {
 203  	BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
 204  	BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
      
 206  	if (!sk->dead) {
      		printk("Attempt to release alive packet socket: %p\n", sk);
 208  		return;
      	}
      
 211  	if (sk->protinfo.destruct_hook)
      		kfree(sk->protinfo.destruct_hook);
      	atomic_dec(&packet_socks_nr);
      #ifdef PACKET_REFCNT_DEBUG
      	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
      #endif
 217  	MOD_DEC_USE_COUNT;
      }
      
      
      extern struct proto_ops packet_ops;
      
      #ifdef CONFIG_SOCK_PACKET
      extern struct proto_ops packet_ops_spkt;
      
 226  static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
      {
      	struct sock *sk;
      	struct sockaddr_pkt *spkt;
      
      	/*
      	 *	When we registered the protocol we saved the socket in the data
      	 *	field for just this event.
      	 */
      
      	sk = (struct sock *) pt->data;
      	
      	/*
      	 *	Yank back the headers [hope the device set this
      	 *	right or kerboom...]
      	 *
      	 *	Incoming packets have ll header pulled,
      	 *	push it back.
      	 *
      	 *	For outgoing ones skb->data == skb->mac.raw
      	 *	so that this procedure is noop.
      	 */
      
 249  	if (skb->pkt_type == PACKET_LOOPBACK)
 250  		goto out;
      
 252  	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
 253  		goto oom;
      
      	spkt = (struct sockaddr_pkt*)skb->cb;
      
      	skb_push(skb, skb->data-skb->mac.raw);
      
      	/*
      	 *	The SOCK_PACKET socket receives _all_ frames.
      	 */
      
      	spkt->spkt_family = dev->type;
      	strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
      	spkt->spkt_protocol = skb->protocol;
      
      	/*
      	 *	Charge the memory to the socket. This is done specifically
      	 *	to prevent sockets using all the memory up.
      	 */
      
 272  	if (sock_queue_rcv_skb(sk,skb) == 0)
 273  		return 0;
      
      out:
      	kfree_skb(skb);
      oom:
 278  	return 0;
      }
      
      
      /*
       *	Output a raw packet to a device layer. This bypasses all the other
       *	protocol layers and you must therefore supply it with a complete frame
       */
       
 287  static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
      			       struct scm_cookie *scm)
      {
      	struct sock *sk = sock->sk;
      	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
      	struct sk_buff *skb;
      	struct net_device *dev;
      	unsigned short proto=0;
      	int err;
      	
      	/*
      	 *	Get and verify the address. 
      	 */
      
 301  	if (saddr)
      	{
 303  		if (msg->msg_namelen < sizeof(struct sockaddr))
 304  			return(-EINVAL);
 305  		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
      			proto=saddr->spkt_protocol;
      	}
 308  	else
 309  		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
      
      	/*
      	 *	Find the device first to size check it 
      	 */
      
      	saddr->spkt_device[13] = 0;
      	dev = dev_get_by_name(saddr->spkt_device);
      	err = -ENODEV;
 318  	if (dev == NULL)
 319  		goto out_unlock;
      	
      	/*
      	 *	You may not queue a frame bigger than the mtu. This is the lowest level
      	 *	raw protocol and you must do your own fragmentation at this level.
      	 */
      	 
      	err = -EMSGSIZE;
 327   	if(len>dev->mtu+dev->hard_header_len)
 328  		goto out_unlock;
      
      	err = -ENOBUFS;
      	skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
      
      	/*
      	 *	If the write buffer is full, then tough. At this level the user gets to
      	 *	deal with the problem - do your own algorithmic backoffs. That's far
      	 *	more flexible.
      	 */
      	 
 339  	if (skb == NULL) 
 340  		goto out_unlock;
      
      	/*
      	 *	Fill it in 
      	 */
      	 
      	/* FIXME: Save some space for broken drivers that write a
      	 * hard header at transmission time by themselves. PPP is the
      	 * notable one here. This should really be fixed at the driver level.
      	 */
      	skb_reserve(skb,(dev->hard_header_len+15)&~15);
      	skb->nh.raw = skb->data;
      
      	/* Try to align data part correctly */
 354  	if (dev->hard_header) {
      		skb->data -= dev->hard_header_len;
      		skb->tail -= dev->hard_header_len;
      	}
      
      	/* Returns -EFAULT on error */
      	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
      	skb->protocol = proto;
      	skb->dev = dev;
      	skb->priority = sk->priority;
 364  	if (err)
 365  		goto out_free;
      
      	err = -ENETDOWN;
 368  	if (!(dev->flags & IFF_UP))
 369  		goto out_free;
      
      	/*
      	 *	Now send it
      	 */
      
      	dev_queue_xmit(skb);
      	dev_put(dev);
 377  	return(len);
      
      out_free:
      	kfree_skb(skb);
      out_unlock:
 382  	if (dev)
      		dev_put(dev);
 384  	return err;
      }
      #endif
      
      /*
         This function makes lazy skb cloning in hope that most of packets
         are discarded by BPF.
      
         Note tricky part: we DO mangle shared skb! skb->data, skb->len
         and skb->cb are mangled. It works because (and until) packets
         falling here are owned by current CPU. Output packets are cloned
         by dev_queue_xmit_nit(), input packets are processed by net_bh
         sequencially, so that if we return skb to original state on exit,
         we will not harm anyone.
       */
      
 400  static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
      {
      	struct sock *sk;
      	struct sockaddr_ll *sll;
      	struct packet_opt *po;
      	u8 * skb_head = skb->data;
      #ifdef CONFIG_FILTER
      	unsigned snaplen;
      #endif
      
 410  	if (skb->pkt_type == PACKET_LOOPBACK)
 411  		goto drop;
      
      	sk = (struct sock *) pt->data;
      	po = sk->protinfo.af_packet;
      
      	skb->dev = dev;
      
 418  	if (dev->hard_header) {
      		/* The device has an explicit notion of ll header,
      		   exported to higher levels.
      
      		   Otherwise, the device hides datails of it frame
      		   structure, so that corresponding packet head
      		   never delivered to user.
      		 */
 426  		if (sk->type != SOCK_DGRAM)
      			skb_push(skb, skb->data - skb->mac.raw);
 428  		else if (skb->pkt_type == PACKET_OUTGOING) {
      			/* Special case: outgoing packets have ll header at head */
      			skb_pull(skb, skb->nh.raw - skb->data);
      		}
      	}
      
      #ifdef CONFIG_FILTER
      	snaplen = skb->len;
      
      	if (sk->filter) {
      		unsigned res = snaplen;
      		struct sk_filter *filter;
      
      		bh_lock_sock(sk);
      		if ((filter = sk->filter) != NULL)
      			res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
      		bh_unlock_sock(sk);
      
      		if (res == 0)
      			goto drop_n_restore;
      		if (snaplen > res)
      			snaplen = res;
      	}
      #endif /* CONFIG_FILTER */
      
 453  	if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
 454  		goto drop_n_acct;
      
 456  	if (skb_shared(skb)) {
      		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 458  		if (nskb == NULL)
 459  			goto drop_n_acct;
      
 461  		if (skb_head != skb->data) {
      			skb->data = skb_head;
      			skb->len = skb->tail - skb->data;
      		}
      		kfree_skb(skb);
      		skb = nskb;
      	}
      
      	sll = (struct sockaddr_ll*)skb->cb;
      	sll->sll_family = AF_PACKET;
      	sll->sll_hatype = dev->type;
      	sll->sll_protocol = skb->protocol;
      	sll->sll_pkttype = skb->pkt_type;
      	sll->sll_ifindex = dev->ifindex;
      	sll->sll_halen = 0;
      
 477  	if (dev->hard_header_parse)
      		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
      
      #ifdef CONFIG_FILTER
      	if (skb->len > snaplen)
      		__skb_trim(skb, snaplen);
      #endif
      
      	skb_set_owner_r(skb, sk);
      	skb->dev = NULL;
      	spin_lock(&sk->receive_queue.lock);
      	po->stats.tp_packets++;
      	__skb_queue_tail(&sk->receive_queue, skb);
 490  	spin_unlock(&sk->receive_queue.lock);
      	sk->data_ready(sk,skb->len);
 492  	return 0;
      
      drop_n_acct:
      	spin_lock(&sk->receive_queue.lock);
      	po->stats.tp_drops++;
 497  	spin_unlock(&sk->receive_queue.lock);
      
      #ifdef CONFIG_FILTER
      drop_n_restore:
      #endif
 502  	if (skb_head != skb->data && skb_shared(skb)) {
      		skb->data = skb_head;
      		skb->len = skb->tail - skb->data;
      	}
      drop:
      	kfree_skb(skb);
 508  	return 0;
      }
      
      #ifdef CONFIG_PACKET_MMAP
      static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
      {
      	struct sock *sk;
      	struct packet_opt *po;
      	struct sockaddr_ll *sll;
      	struct tpacket_hdr *h;
      	u8 * skb_head = skb->data;
      	unsigned snaplen;
      	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
      	unsigned short macoff, netoff;
      	struct sk_buff *copy_skb = NULL;
      
      	if (skb->pkt_type == PACKET_LOOPBACK)
      		goto drop;
      
      	sk = (struct sock *) pt->data;
      	po = sk->protinfo.af_packet;
      
      	if (dev->hard_header) {
      		if (sk->type != SOCK_DGRAM)
      			skb_push(skb, skb->data - skb->mac.raw);
      		else if (skb->pkt_type == PACKET_OUTGOING) {
      			/* Special case: outgoing packets have ll header at head */
      			skb_pull(skb, skb->nh.raw - skb->data);
      		}
      	}
      
      	snaplen = skb->len;
      
      #ifdef CONFIG_FILTER
      	if (sk->filter) {
      		unsigned res = snaplen;
      		struct sk_filter *filter;
      
      		bh_lock_sock(sk);
      		if ((filter = sk->filter) != NULL)
      			res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
      		bh_unlock_sock(sk);
      
      		if (res == 0)
      			goto drop_n_restore;
      		if (snaplen > res)
      			snaplen = res;
      	}
      #endif
      
      	if (sk->type == SOCK_DGRAM) {
      		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
      	} else {
      		unsigned maclen = skb->nh.raw - skb->data;
      		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
      		macoff = netoff - maclen;
      	}
      
      	if (macoff + snaplen > po->frame_size) {
      		if (po->copy_thresh &&
      		    atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
      			if (skb_shared(skb)) {
      				copy_skb = skb_clone(skb, GFP_ATOMIC);
      			} else {
      				copy_skb = skb_get(skb);
      				skb_head = skb->data;
      			}
      			if (copy_skb)
      				skb_set_owner_r(copy_skb, sk);
      		}
      		snaplen = po->frame_size - macoff;
      		if ((int)snaplen < 0)
      			snaplen = 0;
      	}
      
      	spin_lock(&sk->receive_queue.lock);
      	h = po->iovec[po->head];
      
      	if (h->tp_status)
      		goto ring_is_full;
      	po->head = po->head != po->iovmax ? po->head+1 : 0;
      	po->stats.tp_packets++;
      	if (copy_skb) {
      		status |= TP_STATUS_COPY;
      		__skb_queue_tail(&sk->receive_queue, copy_skb);
      	}
      	if (!po->stats.tp_drops)
      		status &= ~TP_STATUS_LOSING;
      	spin_unlock(&sk->receive_queue.lock);
      
      	memcpy((u8*)h + macoff, skb->data, snaplen);
      
      	h->tp_len = skb->len;
      	h->tp_snaplen = snaplen;
      	h->tp_mac = macoff;
      	h->tp_net = netoff;
      	h->tp_sec = skb->stamp.tv_sec;
      	h->tp_usec = skb->stamp.tv_usec;
      
      	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
      	sll->sll_halen = 0;
      	if (dev->hard_header_parse)
      		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
      	sll->sll_family = AF_PACKET;
      	sll->sll_hatype = dev->type;
      	sll->sll_protocol = skb->protocol;
      	sll->sll_pkttype = skb->pkt_type;
      	sll->sll_ifindex = dev->ifindex;
      
      	h->tp_status = status;
      	mb();
      
      	sk->data_ready(sk, 0);
      
      drop_n_restore:
      	if (skb_head != skb->data && skb_shared(skb)) {
      		skb->data = skb_head;
      		skb->len = skb->tail - skb->data;
      	}
      drop:
              kfree_skb(skb);
      	return 0;
      
      ring_is_full:
      	po->stats.tp_drops++;
      	spin_unlock(&sk->receive_queue.lock);
      
      	sk->data_ready(sk, 0);
      	if (copy_skb)
      		kfree_skb(copy_skb);
      	goto drop_n_restore;
      }
      
      #endif
      
      
 644  static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
      			  struct scm_cookie *scm)
      {
      	struct sock *sk = sock->sk;
      	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
      	struct sk_buff *skb;
      	struct net_device *dev;
      	unsigned short proto;
      	unsigned char *addr;
      	int ifindex, err, reserve = 0;
      
      	/*
      	 *	Get and verify the address. 
      	 */
      	 
 659  	if (saddr == NULL) {
      		ifindex	= sk->protinfo.af_packet->ifindex;
      		proto	= sk->num;
      		addr	= NULL;
 663  	} else {
      		err = -EINVAL;
 665  		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
 666  			goto out;
      		ifindex	= saddr->sll_ifindex;
      		proto	= saddr->sll_protocol;
      		addr	= saddr->sll_addr;
      	}
      
      
      	dev = dev_get_by_index(ifindex);
      	err = -ENXIO;
 675  	if (dev == NULL)
 676  		goto out_unlock;
 677  	if (sock->type == SOCK_RAW)
      		reserve = dev->hard_header_len;
      
      	err = -EMSGSIZE;
 681  	if (len > dev->mtu+reserve)
 682  		goto out_unlock;
      
      	skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0, 
      				msg->msg_flags & MSG_DONTWAIT, &err);
 686  	if (skb==NULL)
 687  		goto out_unlock;
      
      	skb_reserve(skb, (dev->hard_header_len+15)&~15);
      	skb->nh.raw = skb->data;
      
 692  	if (dev->hard_header) {
      		int res;
      		err = -EINVAL;
      		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 696  		if (sock->type != SOCK_DGRAM) {
      			skb->tail = skb->data;
      			skb->len = 0;
 699  		} else if (res < 0)
 700  			goto out_free;
      	}
      
      	/* Returns -EFAULT on error */
      	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 705  	if (err)
 706  		goto out_free;
      
      	skb->protocol = proto;
      	skb->dev = dev;
      	skb->priority = sk->priority;
      
      	err = -ENETDOWN;
 713  	if (!(dev->flags & IFF_UP))
 714  		goto out_free;
      
      	/*
      	 *	Now send it
      	 */
      
      	err = dev_queue_xmit(skb);
 721  	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 722  		goto out_unlock;
      
      	dev_put(dev);
      
 726  	return(len);
      
      out_free:
      	kfree_skb(skb);
      out_unlock:
 731  	if (dev)
      		dev_put(dev);
      out:
 734  	return err;
      }
      
      /*
       *	Close a PACKET socket. This is fairly simple. We immediately go
       *	to 'closed' state and remove our protocol entry in the device list.
       */
      
 742  static int packet_release(struct socket *sock)
      {
      	struct sock *sk = sock->sk;
      	struct sock **skp;
      
 747  	if (!sk)
 748  		return 0;
      
 750  	write_lock_bh(&packet_sklist_lock);
 751  	for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
 752  		if (*skp == sk) {
      			*skp = sk->next;
      			__sock_put(sk);
 755  			break;
      		}
      	}
 758  	write_unlock_bh(&packet_sklist_lock);
      
      	/*
      	 *	Unhook packet receive handler.
      	 */
      
 764  	if (sk->protinfo.af_packet->running) {
      		/*
      		 *	Remove the protocol hook
      		 */
      		dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
      		sk->protinfo.af_packet->running = 0;
      		__sock_put(sk);
      	}
      
      #ifdef CONFIG_PACKET_MULTICAST
      	packet_flush_mclist(sk);
      #endif
      
      #ifdef CONFIG_PACKET_MMAP
      	if (sk->protinfo.af_packet->pg_vec) {
      		struct tpacket_req req;
      		memset(&req, 0, sizeof(req));
      		packet_set_ring(sk, &req, 1);
      	}
      #endif
      
      	/*
      	 *	Now the socket is dead. No more input will appear.
      	 */
      
      	sock_orphan(sk);
      	sock->sk = NULL;
      
      	/* Purge queues */
      
      	skb_queue_purge(&sk->receive_queue);
      
      	sock_put(sk);
 797  	return 0;
      }
      
      /*
       *	Attach a packet hook.
       */
      
 804  static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
      {
      	/*
      	 *	Detach an existing hook if present.
      	 */
      
 810  	lock_sock(sk);
      
      	spin_lock(&sk->protinfo.af_packet->bind_lock);
 813  	if (sk->protinfo.af_packet->running) {
      		dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
      		__sock_put(sk);
      		sk->protinfo.af_packet->running = 0;
      	}
      
      	sk->num = protocol;
      	sk->protinfo.af_packet->prot_hook.type = protocol;
      	sk->protinfo.af_packet->prot_hook.dev = dev;
      
      	sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
      
 825  	if (protocol == 0)
 826  		goto out_unlock;
      
 828  	if (dev) {
 829  		if (dev->flags&IFF_UP) {
      			dev_add_pack(&sk->protinfo.af_packet->prot_hook);
      			sock_hold(sk);
      			sk->protinfo.af_packet->running = 1;
 833  		} else {
      			sk->err = ENETDOWN;
 835  			if (!sk->dead)
      				sk->error_report(sk);
      		}
 838  	} else {
      		dev_add_pack(&sk->protinfo.af_packet->prot_hook);
      		sock_hold(sk);
      		sk->protinfo.af_packet->running = 1;
      	}
      
      out_unlock:
 845  	spin_unlock(&sk->protinfo.af_packet->bind_lock);
 846  	release_sock(sk);
 847  	return 0;
      }
      
      /*
       *	Bind a packet socket to a device
       */
      
      #ifdef CONFIG_SOCK_PACKET
      
 856  static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
      {
      	struct sock *sk=sock->sk;
      	char name[15];
      	struct net_device *dev;
      	int err = -ENODEV;
      	
      	/*
      	 *	Check legality
      	 */
      	 
 867  	if(addr_len!=sizeof(struct sockaddr))
 868  		return -EINVAL;
      	strncpy(name,uaddr->sa_data,14);
      	name[14]=0;
      
      	dev = dev_get_by_name(name);
 873  	if (dev) {
      		err = packet_do_bind(sk, dev, sk->num);
      		dev_put(dev);
      	}
 877  	return err;
      }
      #endif
      
 881  static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
      {
      	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
      	struct sock *sk=sock->sk;
      	struct net_device *dev = NULL;
      	int err;
      
      
      	/*
      	 *	Check legality
      	 */
      	 
 893  	if (addr_len < sizeof(struct sockaddr_ll))
 894  		return -EINVAL;
 895  	if (sll->sll_family != AF_PACKET)
 896  		return -EINVAL;
      
 898  	if (sll->sll_ifindex) {
      		err = -ENODEV;
      		dev = dev_get_by_index(sll->sll_ifindex);
 901  		if (dev == NULL)
 902  			goto out;
      	}
      	err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
 905  	if (dev)
      		dev_put(dev);
      
      out:
 909  	return err;
      }
      
      
      /*
       *	Create a packet of type SOCK_PACKET. 
       */
      
 917  static int packet_create(struct socket *sock, int protocol)
      {
      	struct sock *sk;
      	int err;
      
 922  	if (!capable(CAP_NET_RAW))
 923  		return -EPERM;
      	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
      #ifdef CONFIG_SOCK_PACKET
 926  	    && sock->type != SOCK_PACKET
      #endif
 928  	    )
 929  		return -ESOCKTNOSUPPORT;
      
      	sock->state = SS_UNCONNECTED;
 932  	MOD_INC_USE_COUNT;
      
      	err = -ENOBUFS;
      	sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
 936  	if (sk == NULL)
 937  		goto out;
      
      	sock->ops = &packet_ops;
      #ifdef CONFIG_SOCK_PACKET
 941  	if (sock->type == SOCK_PACKET)
      		sock->ops = &packet_ops_spkt;
      #endif
      	sock_init_data(sock,sk);
      
      	sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
 947  	if (sk->protinfo.af_packet == NULL)
 948  		goto out_free;
      	memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
      	sk->family = PF_PACKET;
      	sk->num = protocol;
      
      	sk->destruct = packet_sock_destruct;
      	atomic_inc(&packet_socks_nr);
      
      	/*
      	 *	Attach a protocol block
      	 */
      
 960  	spin_lock_init(&sk->protinfo.af_packet->bind_lock);
      	sk->protinfo.af_packet->prot_hook.func = packet_rcv;
      #ifdef CONFIG_SOCK_PACKET
 963  	if (sock->type == SOCK_PACKET)
      		sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
      #endif
      	sk->protinfo.af_packet->prot_hook.data = (void *)sk;
      
 968  	if (protocol) {
      		sk->protinfo.af_packet->prot_hook.type = protocol;
      		dev_add_pack(&sk->protinfo.af_packet->prot_hook);
      		sock_hold(sk);
      		sk->protinfo.af_packet->running = 1;
      	}
      
 975  	write_lock_bh(&packet_sklist_lock);
      	sk->next = packet_sklist;
      	packet_sklist = sk;
      	sock_hold(sk);
 979  	write_unlock_bh(&packet_sklist_lock);
 980  	return(0);
      
      out_free:
      	sk_free(sk);
      out:
 985  	MOD_DEC_USE_COUNT;
 986  	return err;
      }
      
      /*
       *	Pull a packet from our receive queue and hand it to the user.
       *	If necessary we block.
       */
      
 994  static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
      			  int flags, struct scm_cookie *scm)
      {
      	struct sock *sk = sock->sk;
      	struct sk_buff *skb;
      	int copied, err;
      
      	err = -EINVAL;
1002  	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1003  		goto out;
      
      #if 0
      	/* What error should we return now? EUNATTACH? */
      	if (sk->protinfo.af_packet->ifindex < 0)
      		return -ENODEV;
      #endif
      
      	/*
      	 *	If the address length field is there to be filled in, we fill
      	 *	it in now.
      	 */
      
1016  	if (sock->type == SOCK_PACKET)
      		msg->msg_namelen = sizeof(struct sockaddr_pkt);
1018  	else
      		msg->msg_namelen = sizeof(struct sockaddr_ll);
      
      	/*
      	 *	Call the generic datagram receiver. This handles all sorts
      	 *	of horrible races and re-entrancy so we can forget about it
      	 *	in the protocol layers.
      	 *
      	 *	Now it will return ENETDOWN, if device have just gone down,
      	 *	but then it will block.
      	 */
      
      	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
      
      	/*
      	 *	An error occurred so return it. Because skb_recv_datagram() 
      	 *	handles the blocking we don't see and worry about blocking
      	 *	retries.
      	 */
      
1038  	if(skb==NULL)
1039  		goto out;
      
      	/*
      	 *	You lose any data beyond the buffer you gave. If it worries a
      	 *	user program they can ask the device for its MTU anyway.
      	 */
      
      	copied = skb->len;
1047  	if (copied > len)
      	{
      		copied=len;
      		msg->msg_flags|=MSG_TRUNC;
      	}
      
      	/* We can't use skb_copy_datagram here */
      	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
1055  	if (err)
1056  		goto out_free;
      
      	sock_recv_timestamp(msg, sk, skb);
      
1060  	if (msg->msg_name)
      		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
      
      	/*
      	 *	Free or return the buffer as appropriate. Again this
      	 *	hides all the races and re-entrancy issues from us.
      	 */
      	err = (flags&MSG_TRUNC) ? skb->len : copied;
      
      out_free:
      	skb_free_datagram(sk, skb);
      out:
1072  	return err;
      }
      
      #ifdef CONFIG_SOCK_PACKET
1076  static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
      			       int *uaddr_len, int peer)
      {
      	struct net_device *dev;
      	struct sock *sk	= sock->sk;
      
1082  	if (peer)
1083  		return -EOPNOTSUPP;
      
      	uaddr->sa_family = AF_PACKET;
      	dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1087  	if (dev) {
      		strncpy(uaddr->sa_data, dev->name, 15);
      		dev_put(dev);
1090  	} else
      		memset(uaddr->sa_data, 0, 14);
      	*uaddr_len = sizeof(*uaddr);
      
1094  	return 0;
      }
      #endif
      
1098  static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
      			  int *uaddr_len, int peer)
      {
      	struct net_device *dev;
      	struct sock *sk = sock->sk;
      	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
      
1105  	if (peer)
1106  		return -EOPNOTSUPP;
      
      	sll->sll_family = AF_PACKET;
      	sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
      	sll->sll_protocol = sk->num;
      	dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1112  	if (dev) {
      		sll->sll_hatype = dev->type;
      		sll->sll_halen = dev->addr_len;
      		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
      		dev_put(dev);
1117  	} else {
      		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
      		sll->sll_halen = 0;
      	}
      	*uaddr_len = sizeof(*sll);
      
1123  	return 0;
      }
      
      #ifdef CONFIG_PACKET_MULTICAST
1127  static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
      {
1129  	switch (i->type) {
1130  	case PACKET_MR_MULTICAST:
1131  		if (what > 0)
      			dev_mc_add(dev, i->addr, i->alen, 0);
1133  		else
      			dev_mc_delete(dev, i->addr, i->alen, 0);
1135  		break;
1136  	case PACKET_MR_PROMISC:
      		dev_set_promiscuity(dev, what);
1138  		break;
1139  	case PACKET_MR_ALLMULTI:
      		dev_set_allmulti(dev, what);
1141  		break;
1142  	default:;
      	}
      }
      
1146  static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
      {
1148  	for ( ; i; i=i->next) {
1149  		if (i->ifindex == dev->ifindex)
      			packet_dev_mc(dev, i, what);
      	}
      }
      
1154  static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
      {
      	struct packet_mclist *ml, *i;
      	struct net_device *dev;
      	int err;
      
      	rtnl_lock();
      
      	err = -ENODEV;
      	dev = __dev_get_by_index(mreq->mr_ifindex);
1164  	if (!dev)
1165  		goto done;
      
      	err = -EINVAL;
1168  	if (mreq->mr_alen > dev->addr_len)
1169  		goto done;
      
      	err = -ENOBUFS;
      	i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1173  	if (i == NULL)
1174  		goto done;
      
      	err = 0;
1177  	for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
      		if (ml->ifindex == mreq->mr_ifindex &&
      		    ml->type == mreq->mr_type &&
      		    ml->alen == mreq->mr_alen &&
1181  		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
      			ml->count++;
      			/* Free the new element ... */
      			kfree(i);
1185  			goto done;
      		}
      	}
      
      	i->type = mreq->mr_type;
      	i->ifindex = mreq->mr_ifindex;
      	i->alen = mreq->mr_alen;
      	memcpy(i->addr, mreq->mr_address, i->alen);
      	i->count = 1;
      	i->next = sk->protinfo.af_packet->mclist;
      	sk->protinfo.af_packet->mclist = i;
      	packet_dev_mc(dev, i, +1);
      
      done:
      	rtnl_unlock();
1200  	return err;
      }
      
1203  static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
      {
      	struct packet_mclist *ml, **mlp;
      
      	rtnl_lock();
      
1209  	for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
      		if (ml->ifindex == mreq->mr_ifindex &&
      		    ml->type == mreq->mr_type &&
      		    ml->alen == mreq->mr_alen &&
1213  		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1214  			if (--ml->count == 0) {
      				struct net_device *dev;
      				*mlp = ml->next;
      				dev = dev_get_by_index(ml->ifindex);
1218  				if (dev) {
      					packet_dev_mc(dev, ml, -1);
      					dev_put(dev);
      				}
      				kfree(ml);
      			}
      			rtnl_unlock();
1225  			return 0;
      		}
      	}
      	rtnl_unlock();
1229  	return -EADDRNOTAVAIL;
      }
      
1232  static void packet_flush_mclist(struct sock *sk)
      {
      	struct packet_mclist *ml;
      
1236  	if (sk->protinfo.af_packet->mclist == NULL)
1237  		return;
      
      	rtnl_lock();
1240  	while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
      		struct net_device *dev;
      		sk->protinfo.af_packet->mclist = ml->next;
1243  		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
      			packet_dev_mc(dev, ml, -1);
      			dev_put(dev);
      		}
      		kfree(ml);
      	}
      	rtnl_unlock();
      }
      #endif
      
      static int
1254  packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
      {
      	struct sock *sk = sock->sk;
      	int ret;
      
1259  	if (level != SOL_PACKET)
1260  		return -ENOPROTOOPT;
      
1262  	switch(optname)	{
      #ifdef CONFIG_PACKET_MULTICAST
1264  	case PACKET_ADD_MEMBERSHIP:	
1265  	case PACKET_DROP_MEMBERSHIP:
      	{
      		struct packet_mreq mreq;
1268  		if (optlen<sizeof(mreq))
1269  			return -EINVAL;
1270  		if (copy_from_user(&mreq,optval,sizeof(mreq)))
1271  			return -EFAULT;
1272  		if (optname == PACKET_ADD_MEMBERSHIP)
      			ret = packet_mc_add(sk, &mreq);
1274  		else
      			ret = packet_mc_drop(sk, &mreq);
1276  		return ret;
      	}
      #endif
      #ifdef CONFIG_PACKET_MMAP
      	case PACKET_RX_RING:
      	{
      		struct tpacket_req req;
      
      		if (optlen<sizeof(req))
      			return -EINVAL;
      		if (copy_from_user(&req,optval,sizeof(req)))
      			return -EFAULT;
      		return packet_set_ring(sk, &req, 0);
      	}
      	case PACKET_COPY_THRESH:
      	{
      		int val;
      
      		if (optlen!=sizeof(val))
      			return -EINVAL;
      		if (copy_from_user(&val,optval,sizeof(val)))
      			return -EFAULT;
      
      		sk->protinfo.af_packet->copy_thresh = val;
      		return 0;
      	}
      #endif
1303  	default:
1304  		return -ENOPROTOOPT;
      	}
      }
      
1308  int packet_getsockopt(struct socket *sock, int level, int optname,
      		      char *optval, int *optlen)
      {
      	int len;
      	struct sock *sk = sock->sk;
      
1314  	if (level != SOL_PACKET)
1315  		return -ENOPROTOOPT;
      
1317    	if (get_user(len,optlen))
1318    		return -EFAULT;
      
1320  	switch(optname)	{
1321  	case PACKET_STATISTICS:
      	{
      		struct tpacket_stats st;
      
1325  		if (len > sizeof(struct tpacket_stats))
      			len = sizeof(struct tpacket_stats);
1327  		spin_lock_bh(&sk->receive_queue.lock);
      		st = sk->protinfo.af_packet->stats;
      		memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1330  		spin_unlock_bh(&sk->receive_queue.lock);
      		st.tp_packets += st.tp_drops;
      
1333  		if (copy_to_user(optval, &st, len))
1334  			return -EFAULT;
1335  		break;
      	}
1337  	default:
1338  		return -ENOPROTOOPT;
      	}
      
1341    	if (put_user(len, optlen))
1342    		return -EFAULT;
1343    	return 0;
      }
      
      
1347  static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
      {
      	struct sock *sk;
      	struct packet_opt *po;
      	struct net_device *dev = (struct net_device*)data;
      
      	read_lock(&packet_sklist_lock);
1354  	for (sk = packet_sklist; sk; sk = sk->next) {
      		po = sk->protinfo.af_packet;
      
1357  		switch (msg) {
1358  		case NETDEV_DOWN:
1359  		case NETDEV_UNREGISTER:
1360  			if (dev->ifindex == po->ifindex) {
      				spin_lock(&po->bind_lock);
1362  				if (po->running) {
      					dev_remove_pack(&po->prot_hook);
      					__sock_put(sk);
      					po->running = 0;
      					sk->err = ENETDOWN;
1367  					if (!sk->dead)
      						sk->error_report(sk);
      				}
1370  				if (msg == NETDEV_UNREGISTER) {
      					po->ifindex = -1;
      					po->prot_hook.dev = NULL;
      				}
1374  				spin_unlock(&po->bind_lock);
      			}
      #ifdef CONFIG_PACKET_MULTICAST
1377  			if (po->mclist)
      				packet_dev_mclist(dev, po->mclist, -1);
      #endif
1380  			break;
1381  		case NETDEV_UP:
      			spin_lock(&po->bind_lock);
1383  			if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
      				dev_add_pack(&po->prot_hook);
      				sock_hold(sk);
      				po->running = 1;
      			}
1388  			spin_unlock(&po->bind_lock);
      #ifdef CONFIG_PACKET_MULTICAST
1390  			if (po->mclist)
      				packet_dev_mclist(dev, po->mclist, +1);
      #endif
1393  			break;
      		}
      	}
1396  	read_unlock(&packet_sklist_lock);
1397  	return NOTIFY_DONE;
      }
      
      
1401  static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
      {
      	struct sock *sk = sock->sk;
      	int err;
      	int pid;
      
1407  	switch(cmd) 
      	{
1409  		case SIOCOUTQ:
      		{
      			int amount = atomic_read(&sk->wmem_alloc);
1412  			return put_user(amount, (int *)arg);
      		}
1414  		case SIOCINQ:
      		{
      			struct sk_buff *skb;
      			int amount = 0;
      
1419  			spin_lock_bh(&sk->receive_queue.lock);
      			skb = skb_peek(&sk->receive_queue);
1421  			if (skb)
      				amount = skb->len;
1423  			spin_unlock_bh(&sk->receive_queue.lock);
1424  			return put_user(amount, (int *)arg);
      		}
1426  		case FIOSETOWN:
1427  		case SIOCSPGRP:
      			err = get_user(pid, (int *) arg);
1429  			if (err)
1430  				return err; 
      			if (current->pid != pid && current->pgrp != -pid && 
1432  			    !capable(CAP_NET_ADMIN))
1433  				return -EPERM;
      			sk->proc = pid;
1435  			return(0);
1436  		case FIOGETOWN:
1437  		case SIOCGPGRP:
1438  			return put_user(sk->proc, (int *)arg);
1439  		case SIOCGSTAMP:
1440  			if(sk->stamp.tv_sec==0)
1441  				return -ENOENT;
      			err = -EFAULT;
1443  			if (!copy_to_user((void *)arg, &sk->stamp, sizeof(struct timeval)))
      				err = 0;
1445  			return err;
1446  		case SIOCGIFFLAGS:
      #ifndef CONFIG_INET
      		case SIOCSIFFLAGS:
      #endif
1450  		case SIOCGIFCONF:
1451  		case SIOCGIFMETRIC:
1452  		case SIOCSIFMETRIC:
1453  		case SIOCGIFMEM:
1454  		case SIOCSIFMEM:
1455  		case SIOCGIFMTU:
1456  		case SIOCSIFMTU:
1457  		case SIOCSIFLINK:
1458  		case SIOCGIFHWADDR:
1459  		case SIOCSIFHWADDR:
1460  		case SIOCSIFMAP:
1461  		case SIOCGIFMAP:
1462  		case SIOCSIFSLAVE:
1463  		case SIOCGIFSLAVE:
1464  		case SIOCGIFINDEX:
1465  		case SIOCGIFNAME:
1466  		case SIOCGIFCOUNT:
1467  		case SIOCSIFHWBROADCAST:
1468  			return(dev_ioctl(cmd,(void *) arg));
      
1470  		case SIOCGIFBR:
1471  		case SIOCSIFBR:
      #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
      #ifdef CONFIG_INET
      #ifdef CONFIG_KMOD
      			if (br_ioctl_hook == NULL)
      				request_module("bridge");
      #endif
      			if (br_ioctl_hook != NULL)
      				return br_ioctl_hook(arg);
      #endif
      #endif				
      
1483  		case SIOCGIFDIVERT:
1484  		case SIOCSIFDIVERT:
      #ifdef CONFIG_NET_DIVERT
      			return(divert_ioctl(cmd, (struct divert_cf *) arg));
      #else
1488  			return -ENOPKG;
      #endif /* CONFIG_NET_DIVERT */
      
1491  			return -ENOPKG;
      			
      #ifdef CONFIG_INET
1494  		case SIOCADDRT:
1495  		case SIOCDELRT:
1496  		case SIOCDARP:
1497  		case SIOCGARP:
1498  		case SIOCSARP:
1499  		case SIOCGIFADDR:
1500  		case SIOCSIFADDR:
1501  		case SIOCGIFBRDADDR:
1502  		case SIOCSIFBRDADDR:
1503  		case SIOCGIFNETMASK:
1504  		case SIOCSIFNETMASK:
1505  		case SIOCGIFDSTADDR:
1506  		case SIOCSIFDSTADDR:
1507  		case SIOCSIFFLAGS:
1508  		case SIOCADDDLCI:
1509  		case SIOCDELDLCI:
1510  			return inet_dgram_ops.ioctl(sock, cmd, arg);
      #endif
      
1513  		default:
      			if ((cmd >= SIOCDEVPRIVATE) &&
1515  			    (cmd <= (SIOCDEVPRIVATE + 15)))
1516  				return(dev_ioctl(cmd,(void *) arg));
      
      #ifdef CONFIG_NET_RADIO
      			if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
      				return(dev_ioctl(cmd,(void *) arg));
      #endif
1522  			return -EOPNOTSUPP;
      	}
      	/*NOTREACHED*/
1525  	return(0);
      }
      
      #ifndef CONFIG_PACKET_MMAP
      #define packet_mmap sock_no_mmap
      #define packet_poll datagram_poll
      #else
      
      unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
      {
      	struct sock *sk = sock->sk;
      	struct packet_opt *po = sk->protinfo.af_packet;
      	unsigned int mask = datagram_poll(file, sock, wait);
      
      	spin_lock_bh(&sk->receive_queue.lock);
      	if (po->iovec) {
      		unsigned last = po->head ? po->head-1 : po->iovmax;
      
      		if (po->iovec[last]->tp_status)
      			mask |= POLLIN | POLLRDNORM;
      	}
      	spin_unlock_bh(&sk->receive_queue.lock);
      	return mask;
      }
      
      
      /* Dirty? Well, I still did not learn better way to account
       * for user mmaps.
       */
      
      static void packet_mm_open(struct vm_area_struct *vma)
      {
      	struct file *file = vma->vm_file;
      	struct inode *inode = file->f_dentry->d_inode;
      	struct socket * sock = &inode->u.socket_i;
      	struct sock *sk = sock->sk;
      	
      	if (sk)
      		atomic_inc(&sk->protinfo.af_packet->mapped);
      }
      
      static void packet_mm_close(struct vm_area_struct *vma)
      {
      	struct file *file = vma->vm_file;
      	struct inode *inode = file->f_dentry->d_inode;
      	struct socket * sock = &inode->u.socket_i;
      	struct sock *sk = sock->sk;
      	
      	if (sk)
      		atomic_dec(&sk->protinfo.af_packet->mapped);
      }
      
      static struct vm_operations_struct packet_mmap_ops = {
      	open:	packet_mm_open,
      	close:	packet_mm_close,
      };
      
      static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
      {
      	int i;
      
      	for (i=0; i<len; i++) {
      		if (pg_vec[i]) {
      			struct page *page, *pend;
      
      			pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
      			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
      				ClearPageReserved(page);
      			free_pages(pg_vec[i], order);
      		}
      	}
      	kfree(pg_vec);
      }
      
      
      static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
      {
      	unsigned long *pg_vec = NULL;
      	struct tpacket_hdr **io_vec = NULL;
      	struct packet_opt *po = sk->protinfo.af_packet;
      	int order = 0;
      	int err = 0;
      
      	if (req->tp_block_nr) {
      		int i, l;
      		int frames_per_block;
      
      		/* Sanity tests and some calculations */
      		if ((int)req->tp_block_size <= 0)
      			return -EINVAL;
      		if (req->tp_block_size&(PAGE_SIZE-1))
      			return -EINVAL;
      		if (req->tp_frame_size < TPACKET_HDRLEN)
      			return -EINVAL;
      		if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
      			return -EINVAL;
      		frames_per_block = req->tp_block_size/req->tp_frame_size;
      		if (frames_per_block <= 0)
      			return -EINVAL;
      		if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
      			return -EINVAL;
      		/* OK! */
      
      		/* Allocate page vector */
      		while ((PAGE_SIZE<<order) < req->tp_block_size)
      			order++;
      
      		err = -ENOMEM;
      
      		pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
      		if (pg_vec == NULL)
      			goto out;
      		memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
      
      		for (i=0; i<req->tp_block_nr; i++) {
      			struct page *page, *pend;
      			pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
      			if (!pg_vec[i])
      				goto out_free_pgvec;
      
      			pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
      			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
      				SetPageReserved(page);
      		}
      		/* Page vector is allocated */
      
      		/* Draw frames */
      		io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
      		if (io_vec == NULL)
      			goto out_free_pgvec;
      		memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
      
      		l = 0;
      		for (i=0; i<req->tp_block_nr; i++) {
      			unsigned long ptr = pg_vec[i];
      			int k;
      
      			for (k=0; k<frames_per_block; k++, l++) {
      				io_vec[l] = (struct tpacket_hdr*)ptr;
      				io_vec[l]->tp_status = TP_STATUS_KERNEL;
      				ptr += req->tp_frame_size;
      			}
      		}
      		/* Done */
      	} else {
      		if (req->tp_frame_nr)
      			return -EINVAL;
      	}
      
      	lock_sock(sk);
      
      	/* Detach socket from network */
      	spin_lock(&po->bind_lock);
      	if (po->running)
      		dev_remove_pack(&po->prot_hook);
      	spin_unlock(&po->bind_lock);
      
      	err = -EBUSY;
      	if (closing || atomic_read(&po->mapped) == 0) {
      		err = 0;
      #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
      
      		spin_lock_bh(&sk->receive_queue.lock);
      		pg_vec = XC(po->pg_vec, pg_vec);
      		io_vec = XC(po->iovec, io_vec);
      		po->iovmax = req->tp_frame_nr-1;
      		po->head = 0;
      		po->frame_size = req->tp_frame_size;
      		spin_unlock_bh(&sk->receive_queue.lock);
      
      		order = XC(po->pg_vec_order, order);
      		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
      
      		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
      		po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
      		skb_queue_purge(&sk->receive_queue);
      #undef XC
      		if (atomic_read(&po->mapped))
      			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
      	}
      
      	spin_lock(&po->bind_lock);
      	if (po->running)
      		dev_add_pack(&po->prot_hook);
      	spin_unlock(&po->bind_lock);
      
      	release_sock(sk);
      
      	if (io_vec)
      		kfree(io_vec);
      
      out_free_pgvec:
      	if (pg_vec)
      		free_pg_vec(pg_vec, order, req->tp_block_nr);
      out:
      	return err;
      }
      
      static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
      {
      	struct sock *sk = sock->sk;
      	struct packet_opt *po = sk->protinfo.af_packet;
      	unsigned long size;
      	unsigned long start;
      	int err = -EINVAL;
      	int i;
      
      	if (vma->vm_pgoff)
      		return -EINVAL;
      
      	size = vma->vm_end - vma->vm_start;
      
      	lock_sock(sk);
      	if (po->pg_vec == NULL)
      		goto out;
      	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
      		goto out;
      
      	atomic_inc(&po->mapped);
      	start = vma->vm_start;
      	err = -EAGAIN;
      	for (i=0; i<po->pg_vec_len; i++) {
      		if (remap_page_range(start, __pa(po->pg_vec[i]),
      				     po->pg_vec_pages*PAGE_SIZE,
      				     vma->vm_page_prot))
      			goto out;
      		start += po->pg_vec_pages*PAGE_SIZE;
      	}
      	vma->vm_ops = &packet_mmap_ops;
      	err = 0;
      
      out:
      	release_sock(sk);
      	return err;
      }
      #endif
      
      
      #ifdef CONFIG_SOCK_PACKET
      struct proto_ops packet_ops_spkt = {
      	family:		PF_PACKET,
      
      	release:	packet_release,
      	bind:		packet_bind_spkt,
      	connect:	sock_no_connect,
      	socketpair:	sock_no_socketpair,
      	accept:		sock_no_accept,
      	getname:	packet_getname_spkt,
      	poll:		datagram_poll,
      	ioctl:		packet_ioctl,
      	listen:		sock_no_listen,
      	shutdown:	sock_no_shutdown,
      	setsockopt:	sock_no_setsockopt,
      	getsockopt:	sock_no_getsockopt,
      	sendmsg:	packet_sendmsg_spkt,
      	recvmsg:	packet_recvmsg,
      	mmap:		sock_no_mmap,
      };
      #endif
      
      struct proto_ops packet_ops = {
      	family:		PF_PACKET,
      
      	release:	packet_release,
      	bind:		packet_bind,
      	connect:	sock_no_connect,
      	socketpair:	sock_no_socketpair,
      	accept:		sock_no_accept,
      	getname:	packet_getname, 
      	poll:		packet_poll,
      	ioctl:		packet_ioctl,
      	listen:		sock_no_listen,
      	shutdown:	sock_no_shutdown,
      	setsockopt:	packet_setsockopt,
      	getsockopt:	packet_getsockopt,
      	sendmsg:	packet_sendmsg,
      	recvmsg:	packet_recvmsg,
      	mmap:		packet_mmap,
      };
      
      static struct net_proto_family packet_family_ops = {
      	PF_PACKET,
      	packet_create
      };
      
      struct notifier_block packet_netdev_notifier={
      	packet_notifier,
      	NULL,
      	0
      };
      
      #ifdef CONFIG_PROC_FS
1817  static int packet_read_proc(char *buffer, char **start, off_t offset,
      			     int length, int *eof, void *data)
      {
      	off_t pos=0;
      	off_t begin=0;
      	int len=0;
      	struct sock *s;
      	
      	len+= sprintf(buffer,"sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
      
      	read_lock(&packet_sklist_lock);
      
1829  	for (s = packet_sklist; s; s = s->next) {
      		len+=sprintf(buffer+len,"%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu",
      			     s,
      			     atomic_read(&s->refcnt),
      			     s->type,
      			     ntohs(s->num),
      			     s->protinfo.af_packet->ifindex,
      			     s->protinfo.af_packet->running,
      			     atomic_read(&s->rmem_alloc),
      			     sock_i_uid(s),
      			     sock_i_ino(s)
      			     );
      
      		buffer[len++]='\n';
      		
      		pos=begin+len;
1845  		if(pos<offset) {
      			len=0;
      			begin=pos;
      		}
1849  		if(pos>offset+length)
1850  			goto done;
      	}
      	*eof = 1;
      
      done:
1855  	read_unlock(&packet_sklist_lock);
      	*start=buffer+(offset-begin);
      	len-=(offset-begin);
1858  	if(len>length)
      		len=length;
1860  	if(len<0)
      		len=0;
1862  	return len;
      }
      #endif
      
      
      
1868  static void __exit packet_exit(void)
      {
      #ifdef CONFIG_PROC_FS
      	remove_proc_entry("net/packet", 0);
      #endif
      	unregister_netdevice_notifier(&packet_netdev_notifier);
      	sock_unregister(PF_PACKET);
1875  	return;
      }
      
      
1879  static int __init packet_init(void)
      {
      	sock_register(&packet_family_ops);
      	register_netdevice_notifier(&packet_netdev_notifier);
      #ifdef CONFIG_PROC_FS
      	create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
      #endif
1886  	return 0;
      }
      
      
      module_init(packet_init);
      module_exit(packet_exit);