/*
       *	SUCS NET3:
       *
       *	Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top
       *	of these would make sense. Not tonight however 8-).
       *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and NetROM layer all have identical poll code and mostly
       *	identical recvmsg() code. So we share it here. The poll was shared before but buried in udp.c so I moved it.
       *
       *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old udp.c code)
       *
       *	Fixes:
       *		Alan Cox	:	NULL return from skb_peek_copy() understood
       *		Alan Cox	:	Rewrote skb_read_datagram to avoid the skb_peek_copy stuff.
       *		Alan Cox	:	Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but
       *					AX.25 now works right, and SPX is feasible.
       *		Alan Cox	:	Fixed write poll of non IP protocol crash.
       *		Florian  La Roche:	Changed for my new skbuff handling.
       *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
       *		Linus Torvalds	:	BSD semantic fixes.
       *		Alan Cox	:	Datagram iovec handling
       *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
       *		Alan Cox	:	POSIXisms
       *		Pete Wyckoff    :       Unconnected accept() fix.
       *
       */
      
      #include <linux/types.h>
      #include <linux/kernel.h>
      #include <asm/uaccess.h>
      #include <asm/system.h>
      #include <linux/mm.h>
      #include <linux/interrupt.h>
      #include <linux/in.h>
      #include <linux/errno.h>
      #include <linux/sched.h>
      #include <linux/inet.h>
      #include <linux/netdevice.h>
      #include <linux/poll.h>
      
      #include <net/ip.h>
      #include <net/protocol.h>
      #include <net/route.h>
      #include <net/tcp.h>
      #include <net/udp.h>
      #include <linux/skbuff.h>
      #include <net/sock.h>
      
      
      /*
       *	Is a socket 'connection oriented' ?
       */
       
  53  static inline int connection_based(struct sock *sk)
      {
  55  	return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
      }
      
      
      /*
       * Wait for a packet..
       */
      
  63  static int wait_for_packet(struct sock * sk, int *err, long *timeo_p)
      {
      	int error;
      
      	DECLARE_WAITQUEUE(wait, current);
      
  69  	__set_current_state(TASK_INTERRUPTIBLE);
      	add_wait_queue_exclusive(sk->sleep, &wait);
      
      	/* Socket errors? */
      	error = sock_error(sk);
  74  	if (error)
  75  		goto out;
      
  77  	if (!skb_queue_empty(&sk->receive_queue))
  78  		goto ready;
      
      	/* Socket shut down? */
  81  	if (sk->shutdown & RCV_SHUTDOWN)
  82  		goto out;
      
      	/* Sequenced packets can come disconnected. If so we report the problem */
      	error = -ENOTCONN;
  86  	if(connection_based(sk) && !(sk->state==TCP_ESTABLISHED || sk->state==TCP_LISTEN))
  87  		goto out;
      
      	/* handle signals */
  90  	if (signal_pending(current))
  91  		goto interrupted;
      
      	*timeo_p = schedule_timeout(*timeo_p);
      
      ready:
      	current->state = TASK_RUNNING;
      	remove_wait_queue(sk->sleep, &wait);
  98  	return 0;
      
      interrupted:
      	error = sock_intr_errno(*timeo_p);
      out:
      	current->state = TASK_RUNNING;
      	remove_wait_queue(sk->sleep, &wait);
      	*err = error;
 106  	return error;
      }
      
      /*
       *	Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible
       *	races. This replaces identical code in packet,raw and udp, as well as the IPX
       *	AX.25 and Appletalk. It also finally fixes the long standing peek and read
       *	race for datagram sockets. If you alter this routine remember it must be
       *	re-entrant.
       *
       *	This function will lock the socket if a skb is returned, so the caller
       *	needs to unlock the socket in that case (usually by calling skb_free_datagram)
       *
       *	* It does not lock socket since today. This function is
       *	* free of race conditions. This measure should/can improve
       *	* significantly datagram socket latencies at high loads,
       *	* when data copying to user space takes lots of time.
       *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
       *	*  8) Great win.)
       *	*			                    --ANK (980729)
       *
       *	The order of the tests when we find no data waiting are specified
       *	quite explicitly by POSIX 1003.1g, don't change them without having
       *	the standard around please.
       */
      
 132  struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err)
      {
      	int error;
      	struct sk_buff *skb;
      	long timeo;
      
      	/* Caller is allowed not to check sk->err before skb_recv_datagram() */
      	error = sock_error(sk);
 140  	if (error)
 141  		goto no_packet;
      
      	timeo = sock_rcvtimeo(sk, noblock);
      
 145  	do {
      		/* Again only user level code calls this function, so nothing interrupt level
      		   will suddenly eat the receive_queue.
      
      		   Look at current nfs client by the way...
      		   However, this function was corrent in any case. 8)
      		 */
 152  		if (flags & MSG_PEEK)
      		{
      			unsigned long cpu_flags;
      
 156  			spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
      			skb = skb_peek(&sk->receive_queue);
 158  			if(skb!=NULL)
      				atomic_inc(&skb->users);
 160  			spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
 161  		} else
      			skb = skb_dequeue(&sk->receive_queue);
      
 164  		if (skb)
 165  			return skb;
      
      		/* User doesn't want to wait */
      		error = -EAGAIN;
 169  		if (!timeo)
 170  			goto no_packet;
      
 172  	} while (wait_for_packet(sk, err, &timeo) == 0);
      
 174  	return NULL;
      
      no_packet:
      	*err = error;
 178  	return NULL;
      }
      
 181  void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
      {
      	kfree_skb(skb);
      }
      
      /*
       *	Copy a datagram to a linear buffer.
       */
      
 190  int skb_copy_datagram(struct sk_buff *skb, int offset, char *to, int size)
      {
      	int err = -EFAULT;
      
 194  	if (!copy_to_user(to, skb->h.raw + offset, size))
      		err = 0;
 196  	return err;
      }
      
      
      /*
       *	Copy a datagram to an iovec.
       *	Note: the iovec is modified during the copy.
       */
       
 205  int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
      			    int size)
      {
 208  	return memcpy_toiovec(to, skb->h.raw + offset, size);
      }
      
      /*
       *	Datagram poll: Again totally generic. This also handles
       *	sequenced packet sockets providing the socket receive queue
       *	is only ever holding data ready to receive.
       *
       *	Note: when you _don't_ use this routine for this protocol,
       *	and you use a different write policy from sock_writeable()
       *	then please supply your own write_space callback.
       */
      
 221  unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
      {
      	struct sock *sk = sock->sk;
      	unsigned int mask;
      
      	poll_wait(file, sk->sleep, wait);
      	mask = 0;
      
      	/* exceptional events? */
 230  	if (sk->err || !skb_queue_empty(&sk->error_queue))
      		mask |= POLLERR;
 232  	if (sk->shutdown == SHUTDOWN_MASK)
      		mask |= POLLHUP;
      
      	/* readable? */
 236  	if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
      		mask |= POLLIN | POLLRDNORM;
      
      	/* Connection-based need to check for termination and startup */
 240  	if (connection_based(sk)) {
 241  		if (sk->state==TCP_CLOSE)
      			mask |= POLLHUP;
      		/* connection hasn't started yet? */
 244  		if (sk->state == TCP_SYN_SENT)
 245  			return mask;
      	}
      
      	/* writable? */
 249  	if (sock_writeable(sk))
      		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 251  	else
      		set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
      
 254  	return mask;
      }