/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
 * Version:	@(#)tcp.c	1.0.16	05/25/93
 *
 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Mark Evans, <evansmp@uhura.aston.ac.uk>
 *		Corey Minyard <wf-rch!minyard@relay.EU.net>
 *		Florian La Roche, <flla@stud.uni-sb.de>
 *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 *		Linus Torvalds, <torvalds@cs.helsinki.fi>
 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
 *		Matthew Dillon, <dillon@apollo.west.oic.com>
 *		Arnt Gulbrandsen, <agulbra@no.unit.nvg>
 *
 * Fixes:	
 *		Alan Cox	:	Numerous verify_area() calls
 *		Alan Cox	:	Set the ACK bit on a reset
 *		Alan Cox	:	Stopped it crashing if it closed while sk->inuse=1
 *					and was trying to connect (tcp_err()).
 *		Alan Cox	:	All icmp error handling was broken
 *					pointers passed where wrong and the
 *					socket was looked up backwards. Nobody
 *					tested any icmp error code obviously.
 *		Alan Cox	:	tcp_err() now handled properly. It wakes people
 *					on errors. select behaves and the icmp error race
 *					has gone by moving it into sock.c
 *		Alan Cox	:	tcp_reset() fixed to work for everything not just
 *					packets for unknown sockets.
 *		Alan Cox	:	tcp option processing.
 *		Alan Cox	:	Reset tweaked (still not 100%) [Had syn rule wrong]
 *		Herp Rosmanith  :	More reset fixes
 *		Alan Cox	:	No longer acks invalid rst frames. Acking
 *					any kind of RST is right out.
 *		Alan Cox	:	Sets an ignore me flag on an rst receive
 *					otherwise odd bits of prattle escape still
 *		Alan Cox	:	Fixed another acking RST frame bug. Should stop
 *					LAN workplace lockups.
 *		Alan Cox	: 	Some tidyups using the new skb list facilities
 *		Alan Cox	:	sk->keepopen now seems to work
 *		Alan Cox	:	Pulls options out correctly on accepts
 *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
 *		Alan Cox	:	PSH doesn't end a TCP read. Switched a bit to skb ops.
 *		Alan Cox	:	Tidied tcp_data to avoid a potential nasty.
 *		Alan Cox	:	Added some better commenting, as the tcp is hard to follow
 *		Alan Cox	:	Removed incorrect check for 20 * psh
 *	Michael O'Reilly	:	ack < copied bug fix.
 *	Johannes Stille		:	Misc tcp fixes (not all in yet).
 *		Alan Cox	:	FIN with no memory -> CRASH
 *		Alan Cox	:	Added socket option proto entries. Also added awareness of them to accept.
 *		Alan Cox	:	Added TCP options (SOL_TCP)
 *		Alan Cox	:	Switched wakeup calls to callbacks, so the kernel can layer network sockets.
 *		Alan Cox	:	Use ip_tos/ip_ttl settings.
 *		Alan Cox	:	Handle FIN (more) properly (we hope).
 *		Alan Cox	:	RST frames sent on unsynchronised state ack error/
 *		Alan Cox	:	Put in missing check for SYN bit.
 *		Alan Cox	:	Added tcp_select_window() aka NET2E 
 *					window non shrink trick.
 *		Alan Cox	:	Added a couple of small NET2E timer fixes
 *		Charles Hedrick :	TCP fixes
 *		Toomas Tamm	:	TCP window fixes
 *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
 *		Charles Hedrick	:	Rewrote most of it to actually work
 *		Linus		:	Rewrote tcp_read() and URG handling
 *					completely
 *		Gerhard Koerting:	Fixed some missing timer handling
 *		Matthew Dillon  :	Reworked TCP machine states as per RFC
 *		Gerhard Koerting:	PC/TCP workarounds
 *		Adam Caldwell	:	Assorted timer/timing errors
 *		Matthew Dillon	:	Fixed another RST bug
 *		Alan Cox	:	Move to kernel side addressing changes.
 *		Alan Cox	:	Beginning work on TCP fastpathing (not yet usable)
 *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
 *		Alan Cox	:	TCP fast path debugging
 *		Alan Cox	:	Window clamping
 *		Michael Riepe	:	Bug in tcp_check()
 *		Matt Dillon	:	More TCP improvements and RST bug fixes
 *		Matt Dillon	:	Yet more small nasties remove from the TCP code
 *					(Be very nice to this man if tcp finally works 100%) 8)
 *		Alan Cox	:	BSD accept semantics. 
 *		Alan Cox	:	Reset on closedown bug.
 *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendto().
 *		Michael Pall	:	Handle select() after URG properly in all cases.
 *		Michael Pall	:	Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
 *		Michael Pall	:	Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
 *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the BSD api.
 *		Alan Cox	:	Changed the semantics of sk->socket to 
 *					fix a race and a signal problem with
 *					accept() and async I/O.
 *		Alan Cox	:	Relaxed the rules on tcp_sendto().
 *		Yury Shevchuk	:	Really fixed accept() blocking problem.
 *		Craig I. Hagan  :	Allow for BSD compatible TIME_WAIT for
 *					clients/servers which listen in on
 *					fixed ports.
 *		Alan Cox	:	Cleaned the above up and shrank it to
 *					a sensible code size.
 *		Alan Cox	:	Self connect lockup fix.
 *		Alan Cox	:	No connect to multicast.
 *		Ross Biro	:	Close unaccepted children on master
 *					socket close.
 *		Alan Cox	:	Reset tracing code.
 *		Alan Cox	:	Spurious resets on shutdown.
 *		Alan Cox	:	Giant 15 minute/60 second timer error
 *		Alan Cox	:	Small whoops in selecting before an accept.
 *		Alan Cox	:	Kept the state trace facility since it's
 *					handy for debugging.
 *		Alan Cox	:	More reset handler fixes.
 *		Alan Cox	:	Started rewriting the code based on the RFC's
 *					for other useful protocol references see:  
 *					Comer, KA9Q NOS, and for a reference on the
 *					difference between specifications and how BSD
 *					works see the 4.4lite source.
 *		A.N.Kuznetsov	:	Don't time wait on completion of tidy 
 *					close.
 *		Linus Torvalds	:	Fin/Shutdown & copied_seq changes.
 *		Linus Torvalds	:	Fixed BSD port reuse to work first syn
 *		Alan Cox	:	Reimplemented timers as per the RFC and using multiple
 *					timers for sanity. 
 *		Alan Cox	:	Small bug fixes, and a lot of new
 *					comments.
 *		Alan Cox	:	Fixed dual reader crash by locking
 *					the buffers (much like datagram.c)
 *		Alan Cox	:	Fixed stuck sockets in probe. A probe
 *					now gets fed up of retrying without
 *					(even a no space) answer.
 *		Alan Cox	:	Extracted closing code better
 *		Alan Cox	:	Fixed the closing state machine to
 *					resemble the RFC.
 *		Alan Cox	:	More 'per spec' fixes.
 *		Alan Cox	:	tcp_data() doesn't ack illegal PSH
 *					only frames. At least one pc tcp stack
 *					generates them.
 *		Mark Yarvis	:	In tcp_read_wakeup(), don't send an
 *					ack if stat is TCP_CLOSED.
 *
 *
 * To Fix:
 *		Fast path the code. Two things here - fix the window calculation
 *		so it doesn't iterate over the queue, also spot packets with no funny
 *		options arriving in order and process directly.
 *
 *		Implement RFC 1191 [Path MTU discovery]
 *		Look at the effect of implementing RFC 1337 suggestions and their impact.
 *		Rewrite output state machine to use a single queue and do low window
 *		situations as per the spec (RFC 1122)
 *		Speed up input assembly algorithm.
 *		RFC1323 - PAWS and window scaling. PAWS is required for IPv6 so we
 *		could do with it working on IPv4
 *		User settable/learned rtt/max window/mtu
 *		Cope with MTU/device switches when retransmitting in tcp.
 *		Fix the window handling to use PR's new code.
 *
 *		Change the fundamental structure to a single send queue maintained
 *		by TCP (removing the bogus ip stuff [thus fixing mtu drops on
 *		active routes too]). Cut the queue off in tcp_retransmit/
 *		tcp_transmit.
 *		Change the receive queue to assemble as it goes. This lets us
 *		dispose of most of tcp_sequence, half of tcp_ack and chunks of
 *		tcp_data/tcp_read as well as the window shrink crud.
 *		Separate out duplicated code - tcp_alloc_skb, tcp_build_ack
 *		tcp_queue_skb seem obvious routines to extract.
 *	
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or(at your option) any later version.
 *
 * Description of States:
 *
 *	TCP_SYN_SENT		sent a connection request, waiting for ack
 *
 *	TCP_SYN_RECV		received a connection request, sent ack,
 *				waiting for final ack in three-way handshake.
 *
 *	TCP_ESTABLISHED		connection established
 *
 *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
 *				transmission of remaining buffered data
 *
 *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
 *				to shutdown
 *
 *	TCP_CLOSING		both sides have shutdown but we still have
 *				data we have to finish sending
 *
 *	TCP_TIME_WAIT		timeout to catch resent junk before entering
 *				closed, can only be entered from FIN_WAIT2
 *				or CLOSING.  Required because the other end
 *				may not have gotten our last ACK causing it
 *				to retransmit the data packet (which we ignore)
 *
 *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
 *				us to finish writing our data and to shutdown
 *				(we have to close() to move on to LAST_ACK)
 *
 *	TCP_LAST_ACK		out side has shutdown after remote has
 *				shutdown.  There may still be data in our
 *				buffer that we have to finish sending
 *		
 *	TCP_CLOSE		socket is finished
 */

#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/time.h>
#include <linux/string.h>
#include <linux/config.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/termios.h>
#include <linux/in.h>
#include <linux/fcntl.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include "snmp.h"
#include "ip.h"
#include "protocol.h"
#include "icmp.h"
#include "tcp.h"
#include "arp.h"
#include <linux/skbuff.h>
#include "sock.h"
#include "route.h"
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/segment.h>
#include <linux/mm.h>

/*
 *	The MSL timer is the 'normal' timer.
 */
 
#define reset_msl_timer(x,y,z)	reset_timer(x,y,z)

#define SEQ_TICK 3
unsigned long seq_offset;
struct tcp_mib	tcp_statistics;

static void tcp_close(struct sock *sk, int timeout);


/*
 *	The less said about this the better, but it works and will do for 1.2 
 */

static struct wait_queue *master_select_wakeup;

static __inline__ int min(unsigned int a, unsigned int b)
{
	if (a < b) 
		return(a);
	return(b);
}

#undef STATE_TRACE

#ifdef STATE_TRACE
static char *statename[]={
	"Unused","Established","Syn Sent","Syn Recv",
	"Fin Wait 1","Fin Wait 2","Time Wait", "Close",
	"Close Wait","Last ACK","Listen","Closing"
};
#endif

static __inline__ void tcp_set_state(struct sock *sk, int state)
{
	if(sk->state==TCP_ESTABLISHED)
		tcp_statistics.TcpCurrEstab--;
#ifdef STATE_TRACE
	if(sk->debug)
		printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]);
#endif	
	/* This is a hack but it doesn't occur often and it's going to
	   be a real        to fix nicely */
	   
	if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV)
	{
		wake_up_interruptible(&master_select_wakeup);
	}
	sk->state=state;
	if(state==TCP_ESTABLISHED)
		tcp_statistics.TcpCurrEstab++;
}

/*
 *	This routine picks a TCP windows for a socket based on
 *	the following constraints
 *  
 *	1. The window can never be shrunk once it is offered (RFC 793)
 *	2. We limit memory per socket
 *   
 *	For now we use NET2E3's heuristic of offering half the memory
 *	we have handy. All is not as bad as this seems however because
 *	of two things. Firstly we will bin packets even within the window
 *	in order to get the data we are waiting for into the memory limit.
 *	Secondly we bin common duplicate forms at receive time
 *      Better heuristics welcome
 */
   
int tcp_select_window(struct sock *sk)
{
	int new_window = sk->prot->rspace(sk);
	
	if(sk->window_clamp)
		new_window=min(sk->window_clamp,new_window);
	/*
	 * 	Two things are going on here.  First, we don't ever offer a
	 * 	window less than min(sk->mss, MAX_WINDOW/2).  This is the
	 * 	receiver side of SWS as specified in RFC1122.
	 * 	Second, we always give them at least the window they
	 * 	had before, in order to avoid retracting window.  This
	 * 	is technically allowed, but RFC1122 advises against it and
	 * 	in practice it causes trouble.
	 *
	 * 	Fixme: This doesn't correctly handle the case where
	 *	new_window > sk->window but not by enough to allow for the
	 *	shift in sequence space. 
	 */
	if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
		return(sk->window);
	return(new_window);
}

/*
 *	Find someone to 'accept'. Must be called with
 *	sk->inuse=1 or cli()
 */ 

static struct sk_buff *tcp_find_established(struct sock *s)
{
	struct sk_buff *p=skb_peek(&s->receive_queue);
	if(p==NULL)
		return NULL;
	do
	{
		if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
			return p;
		p=p->next;
	}
	while(p!=(struct sk_buff *)&s->receive_queue);
	return NULL;
}

/*
 *	Remove a completed connection and return it. This is used by
 *	tcp_accept() to get connections from the queue.
 */

static struct sk_buff *tcp_dequeue_established(struct sock *s)
{
	struct sk_buff *skb;
	unsigned long flags;
	save_flags(flags);
	cli(); 
	skb=tcp_find_established(s);
	if(skb!=NULL)
		skb_unlink(skb);	/* Take it off the queue */
	restore_flags(flags);
	return skb;
}

/* 
 *	This routine closes sockets which have been at least partially
 *	opened, but not yet accepted. Currently it is only called by
 *	tcp_close, and timeout mirrors the value there. 
 */

static void tcp_close_pending (struct sock *sk) 
{
	struct sk_buff *skb;

	while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) 
	{
		skb->sk->dead=1;
		tcp_close(skb->sk, 0);
		kfree_skb(skb, FREE_READ);
	}
	return;
}

/*
 *	Enter the time wait state. 
 */

static void tcp_time_wait(struct sock *sk)
{
	tcp_set_state(sk,TCP_TIME_WAIT);
	sk->shutdown = SHUTDOWN_MASK;
	if (!sk->dead)
		sk->state_change(sk);
	reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
}

/*
 *	A socket has timed out on its send queue and wants to do a
 *	little retransmitting. Currently this means TCP.
 */

void tcp_do_retransmit(struct sock *sk, int all)
{
	struct sk_buff * skb;
	struct proto *prot;
	struct device *dev;
	int ct=0;

	prot = sk->prot;
	skb = sk->send_head;

	while (skb != NULL)
	{
		struct tcphdr *th;
		struct iphdr *iph;
		int size;

		dev = skb->dev;
		IS_SKB(skb);
		skb->when = jiffies;

		/*
		 * In general it's OK just to use the old packet.  However we
		 * need to use the current ack and window fields.  Urg and
		 * urg_ptr could possibly stand to be updated as well, but we
		 * don't keep the necessary data.  That shouldn't be a problem,
		 * if the other end is doing the right thing.  Since we're
		 * changing the packet, we have to issue a new IP identifier.
		 */

		iph = (struct iphdr *)(skb->data + dev->hard_header_len);
		th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
		size = skb->len - (((unsigned char *) th) - skb->data);
		
		/*
		 *	Note: We ought to check for window limits here but
		 *	currently this is done (less efficiently) elsewhere.
		 *	We do need to check for a route change but can't handle
		 *	that until we have the new 1.3.x buffers in.
		 *
		 */

		iph->id = htons(ip_id_count++);
		ip_send_check(iph);

		/*
		 *	This is not the right way to handle this. We have to
		 *	issue an up to date window and ack report with this 
		 *	retransmit to keep the odd buggy tcp that relies on 
		 *	the fact BSD does this happy. 
		 *	We don't however need to recalculate the entire 
		 *	checksum, so someone wanting a small problem to play
		 *	with might like to implement RFC1141/RFC1624 and speed
		 *	this up by avoiding a full checksum.
		 */
		 
		th->ack_seq = ntohl(sk->acked_seq);
		th->window = ntohs(tcp_select_window(sk));
		tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
		
		/*
		 *	If the interface is (still) up and running, kick it.
		 */

		if (dev->flags & IFF_UP)
		{
			/*
			 *	If the packet is still being sent by the device/protocol
			 *	below then don't retransmit. This is both needed, and good -
			 *	especially with connected mode AX.25 where it stops resends
			 *	occurring of an as yet unsent anyway frame!
			 *	We still add up the counts as the round trip time wants
			 *	adjusting.
			 */
			if (sk && !skb_device_locked(skb))
			{
				/* Remove it from any existing driver queue first! */
				skb_unlink(skb);
				/* Now queue it */
				ip_statistics.IpOutRequests++;
				dev_queue_xmit(skb, dev, sk->priority);
			}
		}

		/*
		 *	Count retransmissions
		 */
		 
		ct++;
		sk->prot->retransmits ++;

		/*
		 *	Only one retransmit requested.
		 */
	
		if (!all)
			break;

		/*
		 *	This should cut it off before we send too many packets.
		 */

		if (ct >= sk->cong_window)
			break;
		skb = skb->link3;
	}
}

/*
 *	Reset the retransmission timer
 */
 
static void reset_xmit_timer(struct sock *sk, int why, unsigned long when)
{
	del_timer(&sk->retransmit_timer);
	sk->ip_xmit_timeout = why;
	if((int)when < 0)
	{
		when=3;
		printk("Error: Negative timer in xmit_timer\n");
	}
	sk->retransmit_timer.expires=when;
	add_timer(&sk->retransmit_timer);
}

/*
 * 	This is the normal code called for timeouts.  It does the retransmission
 * 	and then does backoff.  tcp_do_retransmit is separated out because
 * 	tcp_ack needs to send stuff from the retransmit queue without
 * 	initiating a backoff.
 */


void tcp_retransmit_time(struct sock *sk, int all)
{
	tcp_do_retransmit(sk, all);

	/*
	 * Increase the timeout each time we retransmit.  Note that
	 * we do not increase the rtt estimate.  rto is initialized
	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
	 * that doubling rto each time is the least we can get away with.
	 * In KA9Q, Karn uses this for the first few times, and then
	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
	 * defined in the protocol as the maximum possible RTT.  I guess
	 * we'll have to use something other than TCP to talk to the
	 * University of Mars.
	 *
	 * PAWS allows us longer timeouts and large windows, so once
	 * implemented ftp to mars will work nicely. We will have to fix
	 * the 120 second clamps though!
	 */

	sk->retransmits++;
	sk->backoff++;
	sk->rto = min(sk->rto << 1, 120*HZ);
	reset_xmit_timer(sk, TIME_WRITE, sk->rto);
}


/*
 *	A timer event has trigger a tcp retransmit timeout. The
 *	socket xmit queue is ready and set up to send. Because
 *	the ack receive code keeps the queue straight we do
 *	nothing clever here.
 */

static void tcp_retransmit(struct sock *sk, int all)
{
	if (all) 
	{
		tcp_retransmit_time(sk, all);
		return;
	}

	sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
	/* sk->ssthresh in theory can be zero.  I guess that's OK */
	sk->cong_count = 0;

	sk->cong_window = 1;

	/* Do the actual retransmit. */
	tcp_retransmit_time(sk, all);
}

/*
 *	A write timeout has occurred. Process the after effects.
 */

static int tcp_write_timeout(struct sock *sk)
{
	/*
	 *	Look for a 'soft' timeout.
	 */
	if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
		|| (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) 
	{
		/*
		 *	Attempt to recover if arp has changed (unlikely!) or
		 *	a route has shifted (not supported prior to 1.3).
		 */
		arp_destroy (sk->daddr, 0);
		ip_route_check (sk->daddr);
	}
	/*
	 *	Has it gone just too far ?
	 */
	if (sk->retransmits > TCP_RETR2) 
	{
		sk->err = ETIMEDOUT;
		sk->error_report(sk);
		del_timer(&sk->retransmit_timer);
		/*
		 *	Time wait the socket 
		 */
		if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) 
		{
			tcp_set_state(sk,TCP_TIME_WAIT);
			reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
		}
		else
		{
			/*
			 *	Clean up time.
			 */
			tcp_set_state(sk, TCP_CLOSE);
			return 0;
		}
	}
	return 1;
}

/*
 *	The TCP retransmit timer. This lacks a few small details.
 *
 *	1. 	An initial rtt timeout on the probe0 should cause what we can
 *		of the first write queue buffer to be split and sent.
 *	2.	On a 'major timeout' as defined by RFC1122 we shouldn't report
 *		ETIMEDOUT if we know an additional 'soft' error caused this.
 *		tcp_err should save a 'soft error' for us.
 */

static void retransmit_timer(unsigned long data)
{
	struct sock *sk = (struct sock*)data;
	int why = sk->ip_xmit_timeout;

	/* 
	 * only process if socket is not in use
	 */

	cli();
	if (sk->inuse || in_bh) 
	{
		/* Try again in 1 second */
		sk->retransmit_timer.expires = HZ;
		add_timer(&sk->retransmit_timer);
		sti();
		return;
	}

	sk->inuse = 1;
	sti();

	/* Always see if we need to send an ack. */

	if (sk->ack_backlog && !sk->zapped) 
	{
		sk->prot->read_wakeup (sk);
		if (! sk->dead)
			sk->data_ready(sk,0);
	}

	/* Now we need to figure out why the socket was on the timer. */

	switch (why) 
	{
		/* Window probing */
		case TIME_PROBE0:
			tcp_send_probe0(sk);
			tcp_write_timeout(sk);
			break;
		/* Retransmitting */
		case TIME_WRITE:
			/* It could be we got here because we needed to send an ack.
			 * So we need to check for that.
			 */
		{
			struct sk_buff *skb;
			unsigned long flags;

			save_flags(flags);
			cli();
			skb = sk->send_head;
			if (!skb) 
			{
				restore_flags(flags);
			} 
			else 
			{
				/*
				 *	Kicked by a delayed ack. Reset timer
				 *	correctly now
				 */
				if (jiffies < skb->when + sk->rto) 
				{
					reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies);
					restore_flags(flags);
					break;
				}
				restore_flags(flags);
				/*
				 *	Retransmission
				 */
				sk->prot->retransmit (sk, 0);
				tcp_write_timeout(sk);
			}
			break;
		}
		/* Sending Keepalives */
		case TIME_KEEPOPEN:
			/* 
			 * this reset_timer() call is a hack, this is not
			 * how KEEPOPEN is supposed to work.
			 */
			reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);

			/* Send something to keep the connection open. */
			if (sk->prot->write_wakeup)
				  sk->prot->write_wakeup (sk);
			sk->retransmits++;
			tcp_write_timeout(sk);
			break;
		default:
			printk ("rexmit_timer: timer expired - reason unknown\n");
			break;
	}
	release_sock(sk);
}

/*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  If err < 0 then the socket should
 * be closed and the error returned to the user.  If err > 0
 * it's just the icmp type << 8 | icmp code.  After adjustment
 * header points to the first 8 bytes of the tcp header.  We need
 * to find the appropriate port.
 */

void tcp_err(int err, unsigned char *header, unsigned long daddr,
	unsigned long saddr, struct inet_protocol *protocol)
{
	struct tcphdr *th;
	struct sock *sk;
	struct iphdr *iph=(struct iphdr *)header;
  
	header+=4*iph->ihl;
   

	th =(struct tcphdr *)header;
	sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);

	if (sk == NULL) 
		return;
  
	if(err<0)
	{
	  	sk->err = -err;
	  	sk->error_report(sk);
	  	return;
	}

	if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
	{
		/*
		 * FIXME:
		 * For now we will just trigger a linear backoff.
		 * The slow start code should cause a real backoff here.
		 */
		if (sk->cong_window > 4)
			sk->cong_window--;
		return;
	}

/*	sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */

	/*
	 * If we've already connected we will keep trying
	 * until we time out, or the user gives up.
	 */

	if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
	{
		if (sk->state == TCP_SYN_SENT) 
		{
			tcp_statistics.TcpAttemptFails++;
			tcp_set_state(sk,TCP_CLOSE);
			sk->error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
		}
		sk->err = icmp_err_convert[err & 0xff].errno;		
	}
	return;
}


/*
 *	Walk down the receive queue counting readable data until we hit the end or we find a gap
 *	in the received data queue (ie a frame missing that needs sending to us). Not
 *	sorting using two queues as data arrives makes life so much harder.
 */

static int tcp_readable(struct sock *sk)
{
	unsigned long counted;
	unsigned long amount;
	struct sk_buff *skb;
	int sum;
	unsigned long flags;

	if(sk && sk->debug)
	  	printk("tcp_readable: %p - ",sk);

	save_flags(flags);
	cli();
	if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
	{
		restore_flags(flags);
	  	if(sk && sk->debug) 
	  		printk("empty\n");
	  	return(0);
	}
  
	counted = sk->copied_seq;	/* Where we are at the moment */
	amount = 0;
  
	/* 
	 *	Do until a push or until we are out of data. 
	 */
	 
	do 
	{
		if (before(counted, skb->h.th->seq)) 	/* Found a hole so stops here */
			break;
		sum = skb->len -(counted - skb->h.th->seq);	/* Length - header but start from where we are up to (avoid overlaps) */
		if (skb->h.th->syn)
			sum++;
		if (sum > 0) 
		{					/* Add it up, move on */
			amount += sum;
			if (skb->h.th->syn) 
				amount--;
			counted += sum;
		}
		/*
		 * Don't count urg data ... but do it in the right place!
		 * Consider: "old_data (ptr is here) URG PUSH data"
		 * The old code would stop at the first push because
		 * it counted the urg (amount==1) and then does amount--
		 * *after* the loop.  This means tcp_readable() always
		 * returned zero if any URG PUSH was in the queue, even
		 * though there was normal data available. If we subtract
		 * the urg data right here, we even get it to work for more
		 * than one URG PUSH skb without normal data.
		 * This means that select() finally works now with urg data
		 * in the queue.  Note that rlogin was never affected
		 * because it doesn't use select(); it uses two processes
		 * and a blocking read().  And the queue scan in tcp_read()
		 * was correct.  Mike <pall@rz.uni-karlsruhe.de>
		 */
		if (skb->h.th->urg)
			amount--;	/* don't count urg data */
		if (amount && skb->h.th->psh) break;
		skb = skb->next;
	}
	while(skb != (struct sk_buff *)&sk->receive_queue);

	restore_flags(flags);
	if(sk->debug)
	  	printk("got %lu bytes.\n",amount);
	return(amount);
}

/*
 * LISTEN is a special case for select..
 */
static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait)
{
	if (sel_type == SEL_IN) {
		int retval;

		sk->inuse = 1;
		retval = (tcp_find_established(sk) != NULL);
		release_sock(sk);
		if (!retval)
			select_wait(&master_select_wakeup,wait);
		return retval;
	}
	return 0;
}


/*
 *	Wait for a TCP event.
 *
 *	Note that we don't need to set "sk->inuse", as the upper select layers
 *	take care of normal races (between the test and the event) and we don't
 *	go look at any of the socket buffers directly.
 */
static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
{
	if (sk->state == TCP_LISTEN)
		return tcp_listen_select(sk, sel_type, wait);

	switch(sel_type) {
	case SEL_IN:
		if (sk->err)
			return 1;
		if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
			break;

		if (sk->shutdown & RCV_SHUTDOWN)
			return 1;
			
		if (sk->acked_seq == sk->copied_seq)
			break;

		if (sk->urg_seq != sk->copied_seq ||
		    sk->acked_seq != sk->copied_seq+1 ||
		    sk->urginline || !sk->urg_data)
			return 1;
		break;

	case SEL_OUT:
		if (sk->err)
			return 1;
		if (sk->shutdown & SEND_SHUTDOWN) 
			return 0;
		if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
			break;
		/*
		 * This is now right thanks to a small fix
		 * by Matt Dillon.
		 */

		if (sk->prot->wspace(sk) < sk->mtu+128+sk->prot->max_header)
			break;
		return 1;

	case SEL_EX:
		if (sk->urg_data)
			return 1;
		break;
	}
	select_wait(sk->sleep, wait);
	return 0;
}

int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
	int err;
	switch(cmd) 
	{

		case TIOCINQ:
#ifdef FIXME	/* FIXME: */
		case FIONREAD:
#endif
		{
			unsigned long amount;

			if (sk->state == TCP_LISTEN) 
				return(-EINVAL);

			sk->inuse = 1;
			amount = tcp_readable(sk);
			release_sock(sk);
			err=verify_area(VERIFY_WRITE,(void *)arg,
						   sizeof(unsigned long));
			if(err)
				return err;
			put_fs_long(amount,(unsigned long *)arg);
			return(0);
		}
		case SIOCATMARK:
		{
			int answ = sk->urg_data && sk->urg_seq == sk->copied_seq;

			err = verify_area(VERIFY_WRITE,(void *) arg,
						  sizeof(unsigned long));
			if (err)
				return err;
			put_fs_long(answ,(int *) arg);
			return(0);
		}
		case TIOCOUTQ:
		{
			unsigned long amount;

			if (sk->state == TCP_LISTEN) return(-EINVAL);
			amount = sk->prot->wspace(sk);
			err=verify_area(VERIFY_WRITE,(void *)arg,
						   sizeof(unsigned long));
			if(err)
				return err;
			put_fs_long(amount,(unsigned long *)arg);
			return(0);
		}
		default:
			return(-EINVAL);
	}
}


/*
 *	This routine computes a TCP checksum. 
 */
 
unsigned short tcp_check(struct tcphdr *th, int len,
	  unsigned long saddr, unsigned long daddr)
{     
	unsigned long sum;
   
	if (saddr == 0) saddr = ip_my_addr();

/*
 * stupid, gcc complains when I use just one __asm__ block,
 * something about too many reloads, but this is just two
 * instructions longer than what I want
 */
	__asm__("
	    addl %%ecx, %%ebx
	    adcl %%edx, %%ebx
	    adcl $0, %%ebx
	    "
	: "=b"(sum)
	: "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
	: "bx", "cx", "dx" );
	__asm__("
	    movl %%ecx, %%edx
	    cld
	    cmpl $32, %%ecx
	    jb 2f
	    shrl $5, %%ecx
	    clc
1:	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    lodsl
	    adcl %%eax, %%ebx
	    loop 1b
	    adcl $0, %%ebx
	    movl %%edx, %%ecx
2:	    andl $28, %%ecx
	    je 4f
	    shrl $2, %%ecx
	    clc
3:	    lodsl
	    adcl %%eax, %%ebx
	    loop 3b
	    adcl $0, %%ebx
4:	    movl $0, %%eax
	    testw $2, %%dx
	    je 5f
	    lodsw
	    addl %%eax, %%ebx
	    adcl $0, %%ebx
	    movw $0, %%ax
5:	    test $1, %%edx
	    je 6f
	    lodsb
	    addl %%eax, %%ebx
	    adcl $0, %%ebx
6:	    movl %%ebx, %%eax
	    shrl $16, %%eax
	    addw %%ax, %%bx
	    adcw $0, %%bx
	    "
	: "=b"(sum)
	: "0"(sum), "c"(len), "S"(th)
	: "ax", "bx", "cx", "dx", "si" );

  	/* We only want the bottom 16 bits, but we never cleared the top 16. */
  
  	return((~sum) & 0xffff);
}


void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
		unsigned long daddr, int len, struct sock *sk)
{
	th->check = 0;
	th->check = tcp_check(th, len, saddr, daddr);
	return;
}

/*
 *	This is the main buffer sending routine. We queue the buffer
 *	having checked it is sane seeming.
 */
 
static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
{
	int size;
	struct tcphdr * th = skb->h.th;

	/*
	 *	length of packet (not counting length of pre-tcp headers) 
	 */
	 
	size = skb->len - ((unsigned char *) th - skb->data);

	/*
	 *	Sanity check it.. 
	 */
	 
	if (size < sizeof(struct tcphdr) || size > skb->len) 
	{
		printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
			skb, skb->data, th, skb->len);
		kfree_skb(skb, FREE_WRITE);
		return;
	}

	/*
	 *	If we have queued a header size packet.. (these crash a few
	 *	tcp stacks if ack is not set)
	 */
	 
	if (size == sizeof(struct tcphdr)) 
	{
		/* If it's got a syn or fin it's notionally included in the size..*/
		if(!th->syn && !th->fin) 
		{
			printk("tcp_send_skb: attempt to queue a bogon.\n");
			kfree_skb(skb,FREE_WRITE);
			return;
		}
	}

	/*
	 *	Actual processing.
	 */
	 
	tcp_statistics.TcpOutSegs++;  
	skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
	
	/*
	 *	We must queue if
	 *
	 *	a) The right edge of this frame exceeds the window
	 *	b) We are retransmitting (Nagle's rule)
	 *	c) We have too many packets 'in flight'
	 */
	 
	if (after(skb->h.seq, sk->window_seq) ||
	    (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
	     sk->packets_out >= sk->cong_window) 
	{
		/* checksum will be supplied by tcp_write_xmit.  So
		 * we shouldn't need to set it at all.  I'm being paranoid */
		th->check = 0;
		if (skb->next != NULL) 
		{
			printk("tcp_send_partial: next != NULL\n");
			skb_unlink(skb);
		}
		skb_queue_tail(&sk->write_queue, skb);
		
		/*
		 *	If we don't fit we have to start the zero window
		 *	probes. This is broken - we really need to do a partial
		 *	send _first_ (This is what causes the Cisco and PC/TCP
		 *	grief).
		 */
		 
		if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
		    sk->send_head == NULL && sk->ack_backlog == 0)
			reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
	} 
	else 
	{
		/*
		 *	This is going straight out
		 */
		 
		th->ack_seq = ntohl(sk->acked_seq);
		th->window = ntohs(tcp_select_window(sk));

		tcp_send_check(th, sk->saddr, sk->daddr, size, sk);

		sk->sent_seq = sk->write_seq;
		
		/*
		 *	This is mad. The tcp retransmit queue is put together
		 *	by the ip layer. This causes half the problems with
		 *	unroutable FIN's and other things.
		 */
		 
		sk->prot->queue_xmit(sk, skb->dev, skb, 0);
		
		/*
		 *	Set for next retransmit based on expected ACK time.
		 *	FIXME: We set this every time which means our 
		 *	retransmits are really about a window behind.
		 */

		reset_xmit_timer(sk, TIME_WRITE, sk->rto);
	}
}

/*
 *	Locking problems lead us to a messy situation where we can have
 *	multiple partially complete buffers queued up. This is really bad
 *	as we don't want to be sending partial buffers. Fix this with
 *	a semaphore or similar to lock tcp_write per socket.
 *
 *	These routines are pretty self descriptive.
 */
 
struct sk_buff * tcp_dequeue_partial(struct sock * sk)
{
	struct sk_buff * skb;
	unsigned long flags;

	save_flags(flags);
	cli();
	skb = sk->partial;
	if (skb) {
		sk->partial = NULL;
		del_timer(&sk->partial_timer);
	}
	restore_flags(flags);
	return skb;
}

/*
 *	Empty the partial queue
 */
 
static void tcp_send_partial(struct sock *sk)
{
	struct sk_buff *skb;

	if (sk == NULL)
		return;
	while ((skb = tcp_dequeue_partial(sk)) != NULL)
		tcp_send_skb(sk, skb);
}

/*
 *	Queue a partial frame
 */
 
void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
{
	struct sk_buff * tmp;
	unsigned long flags;

	save_flags(flags);
	cli();
	tmp = sk->partial;
	if (tmp)
		del_timer(&sk->partial_timer);
	sk->partial = skb;
	init_timer(&sk->partial_timer);
	/*
	 *	Wait up to 1 second for the buffer to fill.
	 */
	sk->partial_timer.expires = HZ;
	sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
	sk->partial_timer.data = (unsigned long) sk;
	add_timer(&sk->partial_timer);
	restore_flags(flags);
	if (tmp)
		tcp_send_skb(sk, tmp);
}


/*
 *	This routine sends an ack and also updates the window. 
 */
 
static void tcp_send_ack(unsigned long sequence, unsigned long ack,
	     struct sock *sk,
	     struct tcphdr *th, unsigned long daddr)
{
	struct sk_buff *buff;
	struct tcphdr *t1;
	struct device *dev = NULL;
	int tmp;

	if(sk->zapped)
		return;		/* We have been reset, we may not send again */
		
	/*
	 * We need to grab some memory, and put together an ack,
	 * and then put it into the queue to be sent.
	 */

	buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
	if (buff == NULL) 
	{
		/* 
		 *	Force it to send an ack. We don't have to do this
		 *	(ACK is unreliable) but it's much better use of 
		 *	bandwidth on slow links to send a spare ack than
		 *	resend packets. 
		 */
		 
		sk->ack_backlog++;
		if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) 
		{
			reset_xmit_timer(sk, TIME_WRITE, HZ);
		}
		return;
	}

	/*
	 *	Assemble a suitable TCP frame
	 */
	 
	buff->len = sizeof(struct tcphdr);
	buff->sk = sk;
	buff->localroute = sk->localroute;
	t1 =(struct tcphdr *) buff->data;

	/* 
	 *	Put in the IP header and routing stuff. 
	 */
	 
	tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
				IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
	if (tmp < 0) 
	{
  		buff->free = 1;
		sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
		return;
	}
	buff->len += tmp;
	t1 =(struct tcphdr *)((char *)t1 +tmp);

	memcpy(t1, th, sizeof(*t1));

	/*
	 *	Swap the send and the receive. 
	 */
	 
	t1->dest = th->source;
	t1->source = th->dest;
	t1->seq = ntohl(sequence);
	t1->ack = 1;
	sk->window = tcp_select_window(sk);
	t1->window = ntohs(sk->window);
	t1->res1 = 0;
	t1->res2 = 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->syn = 0;
	t1->psh = 0;
	t1->fin = 0;
	
	/*
	 *	If we have nothing queued for transmit and the transmit timer
	 *	is on we are just doing an ACK timeout and need to switch
	 *	to a keepalive.
	 */
	 
	if (ack == sk->acked_seq) 
	{
		sk->ack_backlog = 0;
		sk->bytes_rcv = 0;
		sk->ack_timed = 0;
		if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
				  && sk->ip_xmit_timeout == TIME_WRITE) 
		{
			if(sk->keepopen) {
				reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
			} else {
				delete_timer(sk);
			}
		}
  	}
  	
  	/*
  	 *	Fill in the packet and send it
  	 */
  	 
  	t1->ack_seq = ntohl(ack);
  	t1->doff = sizeof(*t1)/4;
  	tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
  	if (sk->debug)
  		 printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
  	tcp_statistics.TcpOutSegs++;
  	sk->prot->queue_xmit(sk, dev, buff, 1);
}


/* 
 *	This routine builds a generic TCP header. 
 */
 
extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
{

	memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
	th->seq = htonl(sk->write_seq);
	th->psh =(push == 0) ? 1 : 0;
	th->doff = sizeof(*th)/4;
	th->ack = 1;
	th->fin = 0;
	sk->ack_backlog = 0;
	sk->bytes_rcv = 0;
	sk->ack_timed = 0;
	th->ack_seq = htonl(sk->acked_seq);
	sk->window = tcp_select_window(sk);
	th->window = htons(sk->window);

	return(sizeof(*th));
}

/*
 *	This routine copies from a user buffer into a socket,
 *	and starts the transmit system.
 */

static int tcp_write(struct sock *sk, unsigned char *from,
	  int len, int nonblock, unsigned flags)
{
	int copied = 0;
	int copy;
	int tmp;
	struct sk_buff *skb;
	struct sk_buff *send_tmp;
	unsigned char *buff;
	struct proto *prot;
	struct device *dev = NULL;

	sk->inuse=1;
	prot = sk->prot;
	while(len > 0) 
	{
		if (sk->err) 
		{			/* Stop on an error */
			release_sock(sk);
			if (copied) 
				return(copied);
			tmp = -sk->err;
			sk->err = 0;
			return(tmp);
		}

		/*
		 *	First thing we do is make sure that we are established. 
		 */
	
		if (sk->shutdown & SEND_SHUTDOWN) 
		{
			release_sock(sk);
			sk->err = EPIPE;
			if (copied) 
				return(copied);
			sk->err = 0;
			return(-EPIPE);
		}

		/* 
		 *	Wait for a connection to finish.
		 */
	
		while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
		{
			if (sk->err) 
			{
				release_sock(sk);
				if (copied) 
					return(copied);
				tmp = -sk->err;
				sk->err = 0;
				return(tmp);
			}

			if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
			{
				release_sock(sk);
				if (copied) 
					return(copied);

				if (sk->err) 
				{
					tmp = -sk->err;
					sk->err = 0;
					return(tmp);
				}

				if (sk->keepopen) 
				{
					send_sig(SIGPIPE, current, 0);
				}
				return(-EPIPE);
			}

			if (nonblock || copied) 
			{
				release_sock(sk);
				if (copied) 
					return(copied);
				return(-EAGAIN);
			}

			release_sock(sk);
			cli();
		
			if (sk->state != TCP_ESTABLISHED &&
		    		sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
		    	{
				interruptible_sleep_on(sk->sleep);
				if (current->signal & ~current->blocked) 
				{
					sti();
					if (copied) 
						return(copied);
					return(-ERESTARTSYS);
				}
			}
			sk->inuse = 1;
			sti();
		}

	/*
	 * The following code can result in copy <= if sk->mss is ever
	 * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
	 * sk->mtu is constant once SYN processing is finished.  I.e. we
	 * had better not get here until we've seen his SYN and at least one
	 * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
	 * But ESTABLISHED should guarantee that.  sk->max_window is by definition
	 * non-decreasing.  Note that any ioctl to set user_mss must be done
	 * before the exchange of SYN's.  If the initial ack from the other
	 * end has a window of 0, max_window and thus mss will both be 0.
	 */

	/* 
	 *	Now we need to check if we have a half built packet. 
	 */

		if ((skb = tcp_dequeue_partial(sk)) != NULL) 
		{
		        int hdrlen;

		         /* IP header + TCP header */
			hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
			         + sizeof(struct tcphdr);
	
			/* Add more stuff to the end of skb->len */
			if (!(flags & MSG_OOB)) 
			{
				copy = min(sk->mss - (skb->len - hdrlen), len);
				/* FIXME: this is really a bug. */
				if (copy <= 0) 
				{
			  		printk("TCP: **bug**: \"copy\" <= 0!!\n");
			  		copy = 0;
				}
	  
				memcpy_fromfs(skb->data + skb->len, from, copy);
				skb->len += copy;
				from += copy;
				copied += copy;
				len -= copy;
				sk->write_seq += copy;
			}
			if ((skb->len - hdrlen) >= sk->mss ||
				(flags & MSG_OOB) || !sk->packets_out)
				tcp_send_skb(sk, skb);
			else
				tcp_enqueue_partial(skb, sk);
			continue;
		}

	/*
	 * We also need to worry about the window.
 	 * If window < 1/2 the maximum window we've seen from this
 	 *   host, don't use it.  This is sender side
 	 *   silly window prevention, as specified in RFC1122.
 	 *   (Note that this is different than earlier versions of
 	 *   SWS prevention, e.g. RFC813.).  What we actually do is 
	 *   use the whole MSS.  Since the results in the right
	 *   edge of the packet being outside the window, it will
	 *   be queued for later rather than sent.
	 */

		copy = sk->window_seq - sk->write_seq;
		if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
			copy = sk->mss;
		if (copy > len)
			copy = len;

	/*
	 *	We should really check the window here also. 
	 */
	 
		send_tmp = NULL;
		if (copy < sk->mss && !(flags & MSG_OOB)) 
		{
			/*
			 *	We will release the socket in case we sleep here. 
			 */
			release_sock(sk);
			/*
			 *	NB: following must be mtu, because mss can be increased.
			 *	mss is always <= mtu 
			 */
			skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
			sk->inuse = 1;
			send_tmp = skb;
		} 
		else 
		{
			/*
			 *	We will release the socket in case we sleep here. 
			 */
			release_sock(sk);
			skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
  			sk->inuse = 1;
		}

		/*
		 *	If we didn't get any memory, we need to sleep. 
		 */

		if (skb == NULL) 
		{
			sk->socket->flags |= SO_NOSPACE;
			if (nonblock) 
			{
				release_sock(sk);
				if (copied) 
					return(copied);
				return(-EAGAIN);
			}

			/*
			 *	FIXME: here is another race condition. 
			 */

			tmp = sk->wmem_alloc;
			release_sock(sk);
			cli();
			/*
			 *	Again we will try to avoid it. 
			 */
			if (tmp <= sk->wmem_alloc &&
				  (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
				&& sk->err == 0) 
			{
				sk->socket->flags &= ~SO_NOSPACE;
				interruptible_sleep_on(sk->sleep);
				if (current->signal & ~current->blocked) 
				{
					sti();
					if (copied) 
						return(copied);
					return(-ERESTARTSYS);
				}
			}
			sk->inuse = 1;
			sti();
			continue;
		}

		skb->len = 0;
		skb->sk = sk;
		skb->free = 0;
		skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
	
		buff = skb->data;
	
		/*
		 * FIXME: we need to optimize this.
		 * Perhaps some hints here would be good.
		 */
		
		tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
				 IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
		if (tmp < 0 ) 
		{
			prot->wfree(sk, skb->mem_addr, skb->mem_len);
			release_sock(sk);
			if (copied) 
				return(copied);
			return(tmp);
		}
		skb->len += tmp;
		skb->dev = dev;
		buff += tmp;
		skb->h.th =(struct tcphdr *) buff;
		tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
		if (tmp < 0) 
		{
			prot->wfree(sk, skb->mem_addr, skb->mem_len);
			release_sock(sk);
			if (copied) 
				return(copied);
			return(tmp);
		}

		if (flags & MSG_OOB) 
		{
			((struct tcphdr *)buff)->urg = 1;
			((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
		}
		skb->len += tmp;
		memcpy_fromfs(buff+tmp, from, copy);

		from += copy;
		copied += copy;
		len -= copy;
		skb->len += copy;
		skb->free = 0;
		sk->write_seq += copy;
	
		if (send_tmp != NULL && sk->packets_out) 
		{
			tcp_enqueue_partial(send_tmp, sk);
			continue;
		}
		tcp_send_skb(sk, skb);
	}
	sk->err = 0;

/*
 *	Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
 *	interactive fast network servers. It's meant to be on and
 *	it really improves the throughput though not the echo time
 *	on my slow slip link - Alan
 */

/*
 *	Avoid possible race on send_tmp - c/o Johannes Stille 
 */
 
	if(sk->partial && ((!sk->packets_out) 
     /* If not nagling we can send on the before case too.. */
	      || (sk->nonagle && before(sk->write_seq , sk->window_seq))
      	))
  		tcp_send_partial(sk);

	release_sock(sk);
	return(copied);
}

/*
 *	This is just a wrapper. 
 */

static int tcp_sendto(struct sock *sk, unsigned char *from,
	   int len, int nonblock, unsigned flags,
	   struct sockaddr_in *addr, int addr_len)
{
	if (flags & ~(MSG_OOB|MSG_DONTROUTE))
		return -EINVAL;
	if (sk->state == TCP_CLOSE)
		return -ENOTCONN;
	if (addr_len < sizeof(*addr))
		return -EINVAL;
	if (addr->sin_family && addr->sin_family != AF_INET) 
		return -EINVAL;
	if (addr->sin_port != sk->dummy_th.dest) 
		return -EISCONN;
	if (addr->sin_addr.s_addr != sk->daddr) 
		return -EISCONN;
	return tcp_write(sk, from, len, nonblock, flags);
}


/*
 *	Send an ack if one is backlogged at this point. Ought to merge
 *	this with tcp_send_ack().
 */
 
static void tcp_read_wakeup(struct sock *sk)
{
	int tmp;
	struct device *dev = NULL;
	struct tcphdr *t1;
	struct sk_buff *buff;

	if (!sk->ack_backlog) 
		return;

	/*
	 * If we're closed, don't send an ack, or we'll get a RST
	 * from the closed destination.
	 */
	if ((sk->state == TCP_CLOSE) || (sk->state == TCP_TIME_WAIT))
		return; 

	/*
	 * FIXME: we need to put code here to prevent this routine from
	 * being called.  Being called once in a while is ok, so only check
	 * if this is the second time in a row.
 	 */

	/*
	 * We need to grab some memory, and put together an ack,
	 * and then put it into the queue to be sent.
	 */

	buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
	if (buff == NULL) 
	{
		/* Try again real soon. */
		reset_xmit_timer(sk, TIME_WRITE, HZ);
		return;
 	}

	buff->len = sizeof(struct tcphdr);
	buff->sk = sk;
	buff->localroute = sk->localroute;
	
	/*
	 *	Put in the IP header and routing stuff. 
	 */

	tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
			       IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
	if (tmp < 0) 
	{
  		buff->free = 1;
		sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
		return;
	}

	buff->len += tmp;
	t1 =(struct tcphdr *)(buff->data +tmp);

	memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
	t1->seq = htonl(sk->sent_seq);
	t1->ack = 1;
	t1->res1 = 0;
	t1->res2 = 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->syn = 0;
	t1->psh = 0;
	sk->ack_backlog = 0;
	sk->bytes_rcv = 0;
	sk->window = tcp_select_window(sk);
	t1->window = ntohs(sk->window);
	t1->ack_seq = ntohl(sk->acked_seq);
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
	sk->prot->queue_xmit(sk, dev, buff, 1);
	tcp_statistics.TcpOutSegs++;
}


/*
 * 	FIXME:
 * 	This routine frees used buffers.
 * 	It should consider sending an ACK to let the
 * 	other end know we now have a bigger window.
 */

static void cleanup_rbuf(struct sock *sk)
{
	unsigned long flags;
	unsigned long left;
	struct sk_buff *skb;
	unsigned long rspace;

	if(sk->debug)
	  	printk("cleaning rbuf for sk=%p\n", sk);
  
	save_flags(flags);
	cli();
  
	left = sk->prot->rspace(sk);
 
	/*
	 *	We have to loop through all the buffer headers,
	 *	and try to free up all the space we can.
	 */

	while((skb=skb_peek(&sk->receive_queue)) != NULL) 
	{
		if (!skb->used || skb->users) 
			break;
		skb_unlink(skb);
		skb->sk = sk;
		kfree_skb(skb, FREE_READ);
	}

	restore_flags(flags);

	/*
	 *	FIXME:
	 *	At this point we should send an ack if the difference
	 *	in the window, and the amount of space is bigger than
	 *	TCP_WINDOW_DIFF.
	 */

	if(sk->debug)
		printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
  					    left);
	if ((rspace=sk->prot->rspace(sk)) != left) 
	{
		/*
		 * This area has caused the most trouble.  The current strategy
		 * is to simply do nothing if the other end has room to send at
		 * least 3 full packets, because the ack from those will auto-
		 * matically update the window.  If the other end doesn't think
		 * we have much space left, but we have room for at least 1 more
		 * complete packet than it thinks we do, we will send an ack
		 * immediately.  Otherwise we will wait up to .5 seconds in case
		 * the user reads some more.
		 */
		sk->ack_backlog++;
	/*
	 * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
	 * if the other end is offering a window smaller than the agreed on MSS
	 * (called sk->mtu here).  In theory there's no connection between send
	 * and receive, and so no reason to think that they're going to send
	 * small packets.  For the moment I'm using the hack of reducing the mss
	 * only on the send side, so I'm putting mtu here.
	 */

		if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
		{
			/* Send an ack right now. */
			tcp_read_wakeup(sk);
		} 
		else 
		{
			/* Force it to send an ack soon. */
			int was_active = del_timer(&sk->retransmit_timer);
			if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
			{
				reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
			} 
			else
				add_timer(&sk->retransmit_timer);
		}
	}
} 


/*
 *	Handle reading urgent data. BSD has very simple semantics for
 *	this, no blocking and very strange errors 8)
 */
 
static int tcp_read_urg(struct sock * sk, int nonblock,
	     unsigned char *to, int len, unsigned flags)
{
	/*
	 *	No URG data to read
	 */
	if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
		return -EINVAL;	/* Yes this is right ! */
		
	if (sk->err) 
	{
		int tmp = -sk->err;
		sk->err = 0;
		return tmp;
	}

	if (sk->state == TCP_CLOSE || sk->done) 
	{
		if (!sk->done) {
			sk->done = 1;
			return 0;
		}
		return -ENOTCONN;
	}

	if (sk->shutdown & RCV_SHUTDOWN) 
	{
		sk->done = 1;
		return 0;
	}
	sk->inuse = 1;
	if (sk->urg_data & URG_VALID) 
	{
		char c = sk->urg_data;
		if (!(flags & MSG_PEEK))
			sk->urg_data = URG_READ;
		put_fs_byte(c, to);
		release_sock(sk);
		return 1;
	}
	release_sock(sk);
	
	/*
	 * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
	 * the available implementations agree in this case:
	 * this call should never block, independent of the
	 * blocking state of the socket.
	 * Mike <pall@rz.uni-karlsruhe.de>
	 */
	return -EAGAIN;
}


/*
 *	This routine copies from a sock struct into the user buffer. 
 */
 
static int tcp_read(struct sock *sk, unsigned char *to,
	int len, int nonblock, unsigned flags)
{
	struct wait_queue wait = { current, NULL };
	int copied = 0;
	unsigned long peek_seq;
	volatile unsigned long *seq;	/* So gcc doesn't overoptimise */
	unsigned long used;

	/* 
	 *	This error should be checked. 
	 */
	 
	if (sk->state == TCP_LISTEN)
		return -ENOTCONN;

	/*
	 *	Urgent data needs to be handled specially. 
	 */
	 
	if (flags & MSG_OOB)
		return tcp_read_urg(sk, nonblock, to, len, flags);

	/*
	 *	Copying sequence to update. This is volatile to handle
	 *	the multi-reader case neatly (memcpy_to/fromfs might be 
	 *	inline and thus not flush cached variables otherwise).
	 */
	 
	peek_seq = sk->copied_seq;
	seq = &sk->copied_seq;
	if (flags & MSG_PEEK)
		seq = &peek_seq;

	add_wait_queue(sk->sleep, &wait);
	sk->inuse = 1;
	while (len > 0) 
	{
		struct sk_buff * skb;
		unsigned long offset;
	
		/*
		 * Are we at urgent data? Stop if we have read anything.
		 */
		 
		if (copied && sk->urg_data && sk->urg_seq == *seq)
			break;

		/*
		 *	Next get a buffer.
		 */
		 
		current->state = TASK_INTERRUPTIBLE;

		skb = skb_peek(&sk->receive_queue);
		do 
		{
			if (!skb)
				break;
			if (before(*seq, skb->h.th->seq))
				break;
			offset = *seq - skb->h.th->seq;
			if (skb->h.th->syn)
				offset--;
			if (offset < skb->len)
				goto found_ok_skb;
			if (skb->h.th->fin)
				goto found_fin_ok;
			if (!(flags & MSG_PEEK))
				skb->used = 1;
			skb = skb->next;
		}
		while (skb != (struct sk_buff *)&sk->receive_queue);

		if (copied)
			break;

		if (sk->err) 
		{
			copied = -sk->err;
			sk->err = 0;
			break;
		}

		if (sk->state == TCP_CLOSE) 
		{
			if (!sk->done) 
			{
				sk->done = 1;
				break;
			}
			copied = -ENOTCONN;
			break;
		}

		if (sk->shutdown & RCV_SHUTDOWN) 
		{
			sk->done = 1;
			break;
		}
			
		if (nonblock) 
		{
			copied = -EAGAIN;
			break;
		}

		cleanup_rbuf(sk);
		release_sock(sk);
		sk->socket->flags |= SO_WAITDATA;
		schedule();
		sk->socket->flags &= ~SO_WAITDATA;
		sk->inuse = 1;

		if (current->signal & ~current->blocked) 
		{
			copied = -ERESTARTSYS;
			break;
		}
		continue;

	found_ok_skb:
		/*
		 *	Lock the buffer. We can be fairly relaxed as
		 *	an interrupt will never steal a buffer we are 
		 *	using unless I've missed something serious in
		 *	tcp_data.
		 */
		
		skb->users++;
		
		/*
		 *	Ok so how much can we use ? 
		 */
		 
		used = skb->len - offset;
		if (len < used)
			used = len;
		/*
		 *	Do we have urgent data here? 
		 */
		
		if (sk->urg_data) 
		{
			unsigned long urg_offset = sk->urg_seq - *seq;
			if (urg_offset < used) 
			{
				if (!urg_offset) 
				{
					if (!sk->urginline) 
					{
						++*seq;
						offset++;
						used--;
					}
				}
				else
					used = urg_offset;
			}
		}
		
		/*
		 *	Copy it - We _MUST_ update *seq first so that we
		 *	don't ever double read when we have dual readers
		 */
		 
		*seq += used;

		/*
		 *	This memcpy_tofs can sleep. If it sleeps and we
		 *	do a second read it relies on the skb->users to avoid
		 *	a crash when cleanup_rbuf() gets called.
		 */
		 
		memcpy_tofs(to,((unsigned char *)skb->h.th) +
			skb->h.th->doff*4 + offset, used);
		copied += used;
		len -= used;
		to += used;
		
		/*
		 *	We now will not sleep again until we are finished
		 *	with skb. Sorry if you are doing the SMP port
		 *	but you'll just have to fix it neatly ;)
		 */
		 
		skb->users --;
		
		if (after(sk->copied_seq,sk->urg_seq))
			sk->urg_data = 0;
		if (used + offset < skb->len)
			continue;
		
		/*
		 *	Process the FIN.
		 */

		if (skb->h.th->fin)
			goto found_fin_ok;
		if (flags & MSG_PEEK)
			continue;
		skb->used = 1;
		continue;

	found_fin_ok:
		++*seq;
		if (flags & MSG_PEEK)
			break;
			
		/*
		 *	All is done
		 */
		 
		skb->used = 1;
		sk->shutdown |= RCV_SHUTDOWN;
		break;

	}
	remove_wait_queue(sk->sleep, &wait);
	current->state = TASK_RUNNING;

	/* Clean up data we have read: This will do ACK frames */
	cleanup_rbuf(sk);
	release_sock(sk);
	return copied;
}

/*
 *	State processing on a close. This implements the state shift for
 *	sending our FIN frame. Note that we only send a FIN for some 
 *	states. A shutdown() may have already sent the FIN, or we may be
 *	closed.
 */
 
static int tcp_close_state(struct sock *sk, int dead)
{
	int ns=TCP_CLOSE;
	int send_fin=0;
	switch(sk->state)
	{
		case TCP_SYN_SENT:	/* No SYN back, no FIN needed */
			break;
		case TCP_SYN_RECV:
		case TCP_ESTABLISHED:	/* Closedown begin */
			ns=TCP_FIN_WAIT1;
			send_fin=1;
			break;
		case TCP_FIN_WAIT1:	/* Already closing, or FIN sent: no change */
		case TCP_FIN_WAIT2:
		case TCP_CLOSING:
			ns=sk->state;
			break;
		case TCP_CLOSE:
		case TCP_LISTEN:
			break;
		case TCP_CLOSE_WAIT:	/* They have FIN'd us. We send our FIN and
					   wait only for the ACK */
			ns=TCP_LAST_ACK;
			send_fin=1;
	}
	
	tcp_set_state(sk,ns);
		
	/*
	 *	This is a (useful) BSD violating of the RFC. There is a
	 *	problem with TCP as specified in that the other end could
	 *	keep a socket open forever with no application left this end.
	 *	We use a 3 minute timeout (about the same as BSD) then kill
	 *	our end. If they send after that then tough - BUT: long enough
	 *	that we won't make the old 4*rto = almost no time - whoops
	 *	reset mistake.
	 */
	if(dead && ns==TCP_FIN_WAIT2)
	{
		int timer_active=del_timer(&sk->timer);
		if(timer_active)
			add_timer(&sk->timer);
		else
			reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
	}
	
	return send_fin;
}

/*
 *	Send a fin.
 */

static void tcp_send_fin(struct sock *sk)
{
	struct proto *prot =(struct proto *)sk->prot;
	struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
	struct tcphdr *t1;
	struct sk_buff *buff;
	struct device *dev=NULL;
	int tmp;
		
	release_sock(sk); /* in case the malloc sleeps. */
	
	buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
	sk->inuse = 1;

	if (buff == NULL)
	{
		/* This is a disaster if it occurs */
		printk("tcp_send_fin: Impossible malloc failure");
		return;
	}

	/*
	 *	Administrivia
	 */
	 
	buff->sk = sk;
	buff->len = sizeof(*t1);
	buff->localroute = sk->localroute;
	t1 =(struct tcphdr *) buff->data;

	/*
	 *	Put in the IP header and routing stuff. 
	 */

	tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
			   IPPROTO_TCP, sk->opt,
			   sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
	if (tmp < 0) 
	{
		int t;
  		/*
  		 *	Finish anyway, treat this as a send that got lost. 
  		 *	(Not good).
  		 */
  		 
	  	buff->free = 1;
		prot->wfree(sk,buff->mem_addr, buff->mem_len);
		sk->write_seq++;
		t=del_timer(&sk->timer);
		if(t)
			add_timer(&sk->timer);
		else
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
		return;
	}
	
	/*
	 *	We ought to check if the end of the queue is a buffer and
	 *	if so simply add the fin to that buffer, not send it ahead.
	 */

	t1 =(struct tcphdr *)((char *)t1 +tmp);
	buff->len += tmp;
	buff->dev = dev;
	memcpy(t1, th, sizeof(*t1));
	t1->seq = ntohl(sk->write_seq);
	sk->write_seq++;
	buff->h.seq = sk->write_seq;
	t1->ack = 1;
	t1->ack_seq = ntohl(sk->acked_seq);
	t1->window = ntohs(sk->window=tcp_select_window(sk));
	t1->fin = 1;
	t1->rst = 0;
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);

	/*
	 * If there is data in the write queue, the fin must be appended to
	 * the write queue.
 	 */
 	
 	if (skb_peek(&sk->write_queue) != NULL) 
 	{
  		buff->free = 0;
		if (buff->next != NULL) 
		{
			printk("tcp_send_fin: next != NULL\n");
			skb_unlink(buff);
		}
		skb_queue_tail(&sk->write_queue, buff);
  	} 
  	else 
  	{
        	sk->sent_seq = sk->write_seq;
		sk->prot->queue_xmit(sk, dev, buff, 0);
		reset_xmit_timer(sk, TIME_WRITE, sk->rto);
	}
}

/*
 *	Shutdown the sending side of a connection. Much like close except
 *	that we don't receive shut down or set sk->dead=1.
 */

void tcp_shutdown(struct sock *sk, int how)
{
	/*
	 *	We need to grab some memory, and put together a FIN,
	 *	and then put it into the queue to be sent.
	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
	 */

	if (!(how & SEND_SHUTDOWN)) 
		return;
	 
	/*
	 *	If we've already sent a FIN, or it's a closed state
	 */
	 
	if (sk->state == TCP_FIN_WAIT1 ||
	    sk->state == TCP_FIN_WAIT2 ||
	    sk->state == TCP_CLOSING ||
	    sk->state == TCP_LAST_ACK ||
	    sk->state == TCP_TIME_WAIT || 
	    sk->state == TCP_CLOSE ||
	    sk->state == TCP_LISTEN
	  )
	{
		return;
	}
	sk->inuse = 1;

	/*
	 * flag that the sender has shutdown
	 */

	sk->shutdown |= SEND_SHUTDOWN;

	/*
	 *  Clear out any half completed packets. 
	 */

	if (sk->partial)
		tcp_send_partial(sk);
		
	/*
	 *	FIN if needed
	 */
	 
	if(tcp_close_state(sk,0))
		tcp_send_fin(sk);
		
	release_sock(sk);
}


static int
tcp_recvfrom(struct sock *sk, unsigned char *to,
	     int to_len, int nonblock, unsigned flags,
	     struct sockaddr_in *addr, int *addr_len)
{
	int result;
  
	/* 
	 *	Have to check these first unlike the old code. If 
	 *	we check them after we lose data on an error
	 *	which is wrong 
	 */

	if(addr_len)
		*addr_len = sizeof(*addr);
	result=tcp_read(sk, to, to_len, nonblock, flags);

	if (result < 0) 
		return(result);
  
  	if(addr)
  	{
		addr->sin_family = AF_INET;
 		addr->sin_port = sk->dummy_th.dest;
		addr->sin_addr.s_addr = sk->daddr;
	}
	return(result);
}


/*
 *	This routine will send an RST to the other tcp. 
 */
 
static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
	  struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
{
	struct sk_buff *buff;
	struct tcphdr *t1;
	int tmp;
	struct device *ndev=NULL;

	/*
	 *	Cannot reset a reset (Think about it).
	 */
	 
	if(th->rst)
		return;
  
	/*
	 * We need to grab some memory, and put together an RST,
	 * and then put it into the queue to be sent.
	 */

	buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
	if (buff == NULL) 
	  	return;

	buff->len = sizeof(*t1);
	buff->sk = NULL;
	buff->dev = dev;
	buff->localroute = 0;

	t1 =(struct tcphdr *) buff->data;

	/*
	 *	Put in the IP header and routing stuff. 
	 */

	tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
			   sizeof(struct tcphdr),tos,ttl);
	if (tmp < 0) 
	{
  		buff->free = 1;
		prot->wfree(NULL, buff->mem_addr, buff->mem_len);
		return;
	}

	t1 =(struct tcphdr *)((char *)t1 +tmp);
	buff->len += tmp;
	memcpy(t1, th, sizeof(*t1));

	/*
	 *	Swap the send and the receive. 
	 */

	t1->dest = th->source;
	t1->source = th->dest;
	t1->rst = 1;  
	t1->window = 0;
  
	if(th->ack)
	{
		t1->ack = 0;
	  	t1->seq = th->ack_seq;
	  	t1->ack_seq = 0;
	}
	else
	{
	  	t1->ack = 1;
	  	if(!th->syn)
  			t1->ack_seq=htonl(th->seq);
  		else
  			t1->ack_seq=htonl(th->seq+1);
  		t1->seq=0;
	}

	t1->syn = 0;
	t1->urg = 0;
	t1->fin = 0;
	t1->psh = 0;
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
	prot->queue_xmit(NULL, ndev, buff, 1);
	tcp_statistics.TcpOutSegs++;
}


/*
 *	Look for tcp options. Parses everything but only knows about MSS.
 *      This routine is always called with the packet containing the SYN.
 *      However it may also be called with the ack to the SYN.  So you
 *      can't assume this is always the SYN.  It's always called after
 *      we have set up sk->mtu to our own MTU.
 *
 *	We need at minimum to add PAWS support here. Possibly large windows
 *	as Linux gets deployed on 100Mb/sec networks.
 */
 
static void tcp_options(struct sock *sk, struct tcphdr *th)
{
	unsigned char *ptr;
	int length=(th->doff*4)-sizeof(struct tcphdr);
	int mss_seen = 0;
    
	ptr = (unsigned char *)(th + 1);
  
	while(length>0)
	{
	  	int opcode=*ptr++;
	  	int opsize=*ptr++;
	  	switch(opcode)
	  	{
	  		case TCPOPT_EOL:
	  			return;
	  		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
	  			length--;
	  			ptr--;		/* the opsize=*ptr++ above was a mistake */
	  			continue;
	  		
	  		default:
	  			if(opsize<=2)	/* Avoid silly options looping forever */
	  				return;
	  			switch(opcode)
	  			{
	  				case TCPOPT_MSS:
	  					if(opsize==4 && th->syn)
	  					{
	  						sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
							mss_seen = 1;
	  					}
	  					break;
		  				/* Add other options here as people feel the urge to implement stuff like large windows */
	  			}
	  			ptr+=opsize-2;
	  			length-=opsize;
	  	}
	}
	if (th->syn) 
	{
		if (! mss_seen)
		      sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
	}
#ifdef CONFIG_INET_PCTCP
	sk->mss = min(sk->max_window >> 1, sk->mtu);
#else    
	sk->mss = min(sk->max_window, sk->mtu);
#endif  
}

static inline unsigned long default_mask(unsigned long dst)
{
	dst = ntohl(dst);
	if (IN_CLASSA(dst))
		return htonl(IN_CLASSA_NET);
	if (IN_CLASSB(dst))
		return htonl(IN_CLASSB_NET);
	return htonl(IN_CLASSC_NET);
}

/*
 *	Default sequence number picking algorithm.
 *	As close as possible to RFC 793, which
 *	suggests using a 250kHz clock.
 *	Further reading shows this assumes 2MB/s networks.
 *	For 10MB/s ethernet, a 1MHz clock is appropriate.
 *	That's funny, Linux has one built in!  Use it!
 */

extern inline unsigned long tcp_init_seq(void)
{
	struct timeval tv;
	do_gettimeofday(&tv);
	return tv.tv_usec+tv.tv_sec*1000000;
}

/*
 *	This routine handles a connection request.
 *	It should make sure we haven't already responded.
 *	Because of the way BSD works, we have to send a syn/ack now.
 *	This also means it will be harder to close a socket which is
 *	listening.
 */
 
static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
		 unsigned long daddr, unsigned long saddr,
		 struct options *opt, struct device *dev, unsigned long seq)
{
	struct sk_buff *buff;
	struct tcphdr *t1;
	unsigned char *ptr;
	struct sock *newsk;
	struct tcphdr *th;
	struct device *ndev=NULL;
	int tmp;
	struct rtable *rt;
  
	th = skb->h.th;

	/* If the socket is dead, don't accept the connection. */
	if (!sk->dead) 
	{
  		sk->data_ready(sk,0);
	}
	else 
	{
		if(sk->debug)
			printk("Reset on %p: Connect on dead socket.\n",sk);
		tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
		tcp_statistics.TcpAttemptFails++;
		kfree_skb(skb, FREE_READ);
		return;
	}

	/*
	 * Make sure we can accept more.  This will prevent a
	 * flurry of syns from eating up all our memory.
	 */

	if (sk->ack_backlog >= sk->max_ack_backlog) 
	{
		tcp_statistics.TcpAttemptFails++;
		kfree_skb(skb, FREE_READ);
		return;
	}

	/*
	 * We need to build a new sock struct.
	 * It is sort of bad to have a socket without an inode attached
	 * to it, but the wake_up's will just wake up the listening socket,
	 * and if the listening socket is destroyed before this is taken
	 * off of the queue, this will take care of it.
	 */

	newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
	if (newsk == NULL) 
	{
		/* just ignore the syn.  It will get retransmitted. */
		tcp_statistics.TcpAttemptFails++;
		kfree_skb(skb, FREE_READ);
		return;
	}

	memcpy(newsk, sk, sizeof(*newsk));
	skb_queue_head_init(&newsk->write_queue);
	skb_queue_head_init(&newsk->receive_queue);
	newsk->send_head = NULL;
	newsk->send_tail = NULL;
	skb_queue_head_init(&newsk->back_log);
	newsk->rtt = 0;		/*TCP_CONNECT_TIME<<3*/
	newsk->rto = TCP_TIMEOUT_INIT;
	newsk->mdev = 0;
	newsk->max_window = 0;
	newsk->cong_window = 1;
	newsk->cong_count = 0;
	newsk->ssthresh = 0;
	newsk->backoff = 0;
	newsk->blog = 0;
	newsk->intr = 0;
	newsk->proc = 0;
	newsk->done = 0;
	newsk->partial = NULL;
	newsk->pair = NULL;
	newsk->wmem_alloc = 0;
	newsk->rmem_alloc = 0;
	newsk->localroute = sk->localroute;

	newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;

	newsk->err = 0;
	newsk->shutdown = 0;
	newsk->ack_backlog = 0;
	newsk->acked_seq = skb->h.th->seq+1;
	newsk->copied_seq = skb->h.th->seq+1;
	newsk->fin_seq = skb->h.th->seq;
	newsk->state = TCP_SYN_RECV;
	newsk->timeout = 0;
	newsk->ip_xmit_timeout = 0;
	newsk->write_seq = seq; 
	newsk->window_seq = newsk->write_seq;
	newsk->rcv_ack_seq = newsk->write_seq;
	newsk->urg_data = 0;
	newsk->retransmits = 0;
	newsk->linger=0;
	newsk->destroy = 0;
	init_timer(&newsk->timer);
	newsk->timer.data = (unsigned long)newsk;
	newsk->timer.function = &net_timer;
	init_timer(&newsk->retransmit_timer);
	newsk->retransmit_timer.data = (unsigned long)newsk;
	newsk->retransmit_timer.function=&retransmit_timer;
	newsk->dummy_th.source = skb->h.th->dest;
	newsk->dummy_th.dest = skb->h.th->source;
	
	/*
	 *	Swap these two, they are from our point of view. 
	 */
	 
	newsk->daddr = saddr;
	newsk->saddr = daddr;

	put_sock(newsk->num,newsk);
	newsk->dummy_th.res1 = 0;
	newsk->dummy_th.doff = 6;
	newsk->dummy_th.fin = 0;
	newsk->dummy_th.syn = 0;
	newsk->dummy_th.rst = 0;	
	newsk->dummy_th.psh = 0;
	newsk->dummy_th.ack = 0;
	newsk->dummy_th.urg = 0;
	newsk->dummy_th.res2 = 0;
	newsk->acked_seq = skb->h.th->seq + 1;
	newsk->copied_seq = skb->h.th->seq + 1;
	newsk->socket = NULL;

	/*
	 *	Grab the ttl and tos values and use them 
	 */

	newsk->ip_ttl=sk->ip_ttl;
	newsk->ip_tos=skb->ip_hdr->tos;

	/*
	 *	Use 512 or whatever user asked for 
	 */

	/*
	 * 	Note use of sk->user_mss, since user has no direct access to newsk 
	 */

	rt=ip_rt_route(saddr, NULL,NULL);
	
	if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
		newsk->window_clamp = rt->rt_window;
	else
		newsk->window_clamp = 0;
		
	if (sk->user_mss)
		newsk->mtu = sk->user_mss;
	else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
		newsk->mtu = rt->rt_mss - HEADER_SIZE;
	else 
	{
#ifdef CONFIG_INET_SNARL	/* Sub Nets Are Local */
		if ((saddr ^ daddr) & default_mask(saddr))
#else
		if ((saddr ^ daddr) & dev->pa_mask)
#endif
			newsk->mtu = 576 - HEADER_SIZE;
		else
			newsk->mtu = MAX_WINDOW;
	}

	/*
	 *	But not bigger than device MTU 
	 */

	newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);

	/*
	 *	This will min with what arrived in the packet 
	 */

	tcp_options(newsk,skb->h.th);

	buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
	if (buff == NULL) 
	{
		sk->err = ENOMEM;
		newsk->dead = 1;
		newsk->state = TCP_CLOSE;
		/* And this will destroy it */
		release_sock(newsk);
		kfree_skb(skb, FREE_READ);
		tcp_statistics.TcpAttemptFails++;
		return;
	}
  
	buff->len = sizeof(struct tcphdr)+4;
	buff->sk = newsk;
	buff->localroute = newsk->localroute;

	t1 =(struct tcphdr *) buff->data;

	/*
	 *	Put in the IP header and routing stuff. 
	 */

	tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
			       IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);

	/*
	 *	Something went wrong. 
	 */

	if (tmp < 0) 
	{
		sk->err = tmp;
		buff->free = 1;
		kfree_skb(buff,FREE_WRITE);
		newsk->dead = 1;
		newsk->state = TCP_CLOSE;
		release_sock(newsk);
		skb->sk = sk;
		kfree_skb(skb, FREE_READ);
		tcp_statistics.TcpAttemptFails++;
		return;
	}

	buff->len += tmp;
	t1 =(struct tcphdr *)((char *)t1 +tmp);
  
	memcpy(t1, skb->h.th, sizeof(*t1));
	buff->h.seq = newsk->write_seq;
	/*
	 *	Swap the send and the receive. 
	 */
	t1->dest = skb->h.th->source;
	t1->source = newsk->dummy_th.source;
	t1->seq = ntohl(newsk->write_seq++);
	t1->ack = 1;
	newsk->window = tcp_select_window(newsk);
	newsk->sent_seq = newsk->write_seq;
	t1->window = ntohs(newsk->window);
	t1->res1 = 0;
	t1->res2 = 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->psh = 0;
	t1->syn = 1;
	t1->ack_seq = ntohl(skb->h.th->seq+1);
	t1->doff = sizeof(*t1)/4+1;
	ptr =(unsigned char *)(t1+1);
	ptr[0] = 2;
	ptr[1] = 4;
	ptr[2] = ((newsk->mtu) >> 8) & 0xff;
	ptr[3] =(newsk->mtu) & 0xff;

	tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
	newsk->prot->queue_xmit(newsk, ndev, buff, 0);
	reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
	skb->sk = newsk;

	/*
	 *	Charge the sock_buff to newsk. 
	 */
	 
	sk->rmem_alloc -= skb->mem_len;
	newsk->rmem_alloc += skb->mem_len;
	
	skb_queue_tail(&sk->receive_queue,skb);
	sk->ack_backlog++;
	release_sock(newsk);
	tcp_statistics.TcpOutSegs++;
}


static void tcp_close(struct sock *sk, int timeout)
{
	/*
	 * We need to grab some memory, and put together a FIN,	
	 * and then put it into the queue to be sent.
	 */
	
	sk->inuse = 1;
	
	if(sk->state == TCP_LISTEN)
	{
		/* Special case */
		tcp_set_state(sk, TCP_CLOSE);
		tcp_close_pending(sk);
		release_sock(sk);
		return;
	}
	
	sk->keepopen = 1;
	sk->shutdown = SHUTDOWN_MASK;

	if (!sk->dead) 
	  	sk->state_change(sk);

	if (timeout == 0) 
	{
		struct sk_buff *skb;
		
		/*
		 *  We need to flush the recv. buffs.  We do this only on the
		 *  descriptor close, not protocol-sourced closes, because the
		 *  reader process may not have drained the data yet!
		 */
		 
		while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
			kfree_skb(skb, FREE_READ);
		/*
		 *	Get rid off any half-completed packets. 
		 */

		if (sk->partial) 
			tcp_send_partial(sk);
	}

		
	/*
	 *	Timeout is not the same thing - however the code likes
	 *	to send both the same way (sigh).
	 */
	 
	if(timeout)
	{
		tcp_set_state(sk, TCP_CLOSE);	/* Dead */
	}
	else
	{
		if(tcp_close_state(sk,1)==1)
		{
			tcp_send_fin(sk);
		}
	}
	release_sock(sk);
}


/*
 * 	This routine takes stuff off of the write queue,
 *	and puts it in the xmit queue. This happens as incoming acks
 *	open up the remote window for us.
 */
 
static void tcp_write_xmit(struct sock *sk)
{
	struct sk_buff *skb;

	/*
	 *	The bytes will have to remain here. In time closedown will
	 *	empty the write queue and all will be happy 
	 */

	if(sk->zapped)
		return;

	/*
	 *	Anything on the transmit queue that fits the window can
	 *	be added providing we are not
	 *
	 *	a) retransmitting (Nagle's rule)
	 *	b) exceeding our congestion window.
	 */
	 
	while((skb = skb_peek(&sk->write_queue)) != NULL &&
		before(skb->h.seq, sk->window_seq + 1) &&
		(sk->retransmits == 0 ||
		 sk->ip_xmit_timeout != TIME_WRITE ||
		 before(skb->h.seq, sk->rcv_ack_seq + 1))
		&& sk->packets_out < sk->cong_window) 
	{
		IS_SKB(skb);
		skb_unlink(skb);
		
		/*
		 *	See if we really need to send the packet. 
		 */
		 
		if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
		{
			/*
			 *	This is acked data. We can discard it. This 
			 *	cannot currently occur.
			 */
			 
			sk->retransmits = 0;
			kfree_skb(skb, FREE_WRITE);
			if (!sk->dead) 
				sk->write_space(sk);
		} 
		else
		{
			struct tcphdr *th;
			struct iphdr *iph;
			int size;
/*
 * put in the ack seq and window at this point rather than earlier,
 * in order to keep them monotonic.  We really want to avoid taking
 * back window allocations.  That's legal, but RFC1122 says it's frowned on.
 * Ack and window will in general have changed since this packet was put
 * on the write queue.
 */
			iph = (struct iphdr *)(skb->data +
					       skb->dev->hard_header_len);
			th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
			size = skb->len - (((unsigned char *) th) - skb->data);
			
			th->ack_seq = ntohl(sk->acked_seq);
			th->window = ntohs(tcp_select_window(sk));

			tcp_send_check(th, sk->saddr, sk->daddr, size, sk);

			sk->sent_seq = skb->h.seq;
			
			/*
			 *	IP manages our queue for some crazy reason
			 */
			 
			sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
			
			/*
			 *	Again we slide the timer wrongly
			 */
			 
			reset_xmit_timer(sk, TIME_WRITE, sk->rto);
		}
	}
}


/*
 *	This routine deals with incoming acks, but not outgoing ones.
 */

extern __inline__ int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
{
	unsigned long ack;
	int flag = 0;

	/* 
	 * 1 - there was data in packet as well as ack or new data is sent or 
	 *     in shutdown state
	 * 2 - data from retransmit queue was acked and removed
	 * 4 - window shrunk or data from retransmit queue was acked and removed
	 */

	if(sk->zapped)
		return(1);	/* Dead, cant ack any more so why bother */

	/*
	 *	Have we discovered a larger window
	 */
	 
	ack = ntohl(th->ack_seq);

	if (ntohs(th->window) > sk->max_window) 
	{
  		sk->max_window = ntohs(th->window);
#ifdef CONFIG_INET_PCTCP
		/* Hack because we don't send partial packets to non SWS
		   handling hosts */
		sk->mss = min(sk->max_window>>1, sk->mtu);
#else
		sk->mss = min(sk->max_window, sk->mtu);
#endif	
	}

	/*
	 *	We have dropped back to keepalive timeouts. Thus we have
	 *	no retransmits pending.
	 */
	 
	if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN)
	  	sk->retransmits = 0;

	/*
	 *	If the ack is newer than sent or older than previous acks
	 *	then we can probably ignore it.
	 */
	 
	if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
	{
		if(sk->debug)
			printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
			
		/*
		 *	Keepalive processing.
		 */
		 
		if (after(ack, sk->sent_seq)) 
		{
			return(0);
		}
		
		/*
		 *	Restart the keepalive timer.
		 */
		 
		if (sk->keepopen) 
		{
			if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
				reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
		}
		return(1);
	}

	/*
	 *	If there is data set flag 1
	 */
	 
	if (len != th->doff*4) 
		flag |= 1;

	/*
	 *	See if our window has been shrunk. 
	 */

	if (after(sk->window_seq, ack+ntohs(th->window))) 
	{
		/*
		 * We may need to move packets from the send queue
		 * to the write queue, if the window has been shrunk on us.
		 * The RFC says you are not allowed to shrink your window
		 * like this, but if the other end does, you must be able
		 * to deal with it.
		 */
		struct sk_buff *skb;
		struct sk_buff *skb2;
		struct sk_buff *wskb = NULL;
  	
		skb2 = sk->send_head;
		sk->send_head = NULL;
		sk->send_tail = NULL;
	
		/*
		 *	This is an artifact of a flawed concept. We want one
		 *	queue and a smarter send routine when we send all.
		 */
	
		flag |= 4;	/* Window changed */
	
		sk->window_seq = ack + ntohs(th->window);
		cli();
		while (skb2 != NULL) 
		{
			skb = skb2;
			skb2 = skb->link3;
			skb->link3 = NULL;
			if (after(skb->h.seq, sk->window_seq)) 
			{
				if (sk->packets_out > 0) 
					sk->packets_out--;
				/* We may need to remove this from the dev send list. */
				if (skb->next != NULL) 
				{
					skb_unlink(skb);				
				}
				/* Now add it to the write_queue. */
				if (wskb == NULL)
					skb_queue_head(&sk->write_queue,skb);
				else
					skb_append(wskb,skb);
				wskb = skb;
			} 
			else 
			{
				if (sk->send_head == NULL) 
				{
					sk->send_head = skb;
					sk->send_tail = skb;
				}
				else
				{
					sk->send_tail->link3 = skb;
					sk->send_tail = skb;
				}
				skb->link3 = NULL;
			}
		}
		sti();
	}

	/*
	 *	Pipe has emptied
	 */
	 
	if (sk->send_tail == NULL || sk->send_head == NULL) 
	{
		sk->send_head = NULL;
		sk->send_tail = NULL;
		sk->packets_out= 0;
	}

	/*
	 *	Update the right hand window edge of the host
	 */
	 
	sk->window_seq = ack + ntohs(th->window);

	/*
	 *	We don't want too many packets out there. 
	 */
	 
	if (sk->ip_xmit_timeout == TIME_WRITE && 
		sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
	{
		/* 
		 * This is Jacobson's slow start and congestion avoidance. 
		 * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
		 * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
		 * counter and increment it once every cwnd times.  It's possible
		 * that this should be done only if sk->retransmits == 0.  I'm
		 * interpreting "new data is acked" as including data that has
		 * been retransmitted but is just now being acked.
		 */
		if (sk->cong_window < sk->ssthresh)  
			/* 
			 *	In "safe" area, increase
			 */
			sk->cong_window++;
		else 
		{
			/*
			 *	In dangerous area, increase slowly.  In theory this is
			 *  	sk->cong_window += 1 / sk->cong_window
			 */
			if (sk->cong_count >= sk->cong_window) 
			{
				sk->cong_window++;
				sk->cong_count = 0;
			}
			else 
				sk->cong_count++;
		}
	}

	/*
	 *	Remember the highest ack received.
	 */
	 
	sk->rcv_ack_seq = ack;

	/*
	 *	If this ack opens up a zero window, clear backoff.  It was
	 *	being used to time the probes, and is probably far higher than
	 *	it needs to be for normal retransmission.
	 */

	if (sk->ip_xmit_timeout == TIME_PROBE0) 
	{
		sk->retransmits = 0;	/* Our probe was answered */
		
		/*
		 *	Was it a usable window open ?
		 */
		 
  		if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
		    ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
		{
			sk->backoff = 0;
			
			/*
			 *	Recompute rto from rtt.  this eliminates any backoff.
			 */

			sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
			if (sk->rto > 120*HZ)
				sk->rto = 120*HZ;
			if (sk->rto < 20)	/* Was 1*HZ, then 1 - turns out we must allow about
						   .2 of a second because of BSD delayed acks - on a 100Mb/sec link
						   .2 of a second is going to need huge windows (SIGH) */
			sk->rto = 20;
		}
	}

	/* 
	 *	See if we can take anything off of the retransmit queue.
	 */
   
	while(sk->send_head != NULL) 
	{
		/* Check for a bug. */
		if (sk->send_head->link3 &&
		    after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
			printk("INET: tcp.c: *** bug send_list out of order.\n");
			
		/*
		 *	If our packet is before the ack sequence we can
		 *	discard it as it's confirmed to have arrived the other end.
		 */
		 
		if (before(sk->send_head->h.seq, ack+1)) 
		{
			struct sk_buff *oskb;	
			if (sk->retransmits) 
			{	
				/*
				 *	We were retransmitting.  don't count this in RTT est 
				 */
				flag |= 2;

				/*
				 * even though we've gotten an ack, we're still
				 * retransmitting as long as we're sending from
				 * the retransmit queue.  Keeping retransmits non-zero
				 * prevents us from getting new data interspersed with
				 * retransmissions.
				 */

				if (sk->send_head->link3)	/* Any more queued retransmits? */
					sk->retransmits = 1;
				else
					sk->retransmits = 0;
			}
  			/*
			 * Note that we only reset backoff and rto in the
			 * rtt recomputation code.  And that doesn't happen
			 * if there were retransmissions in effect.  So the
			 * first new packet after the retransmissions is
			 * sent with the backoff still in effect.  Not until
			 * we get an ack from a non-retransmitted packet do
			 * we reset the backoff and rto.  This allows us to deal
			 * with a situation where the network delay has increased
			 * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
			 */

			/*
			 *	We have one less packet out there. 
			 */
			 
			if (sk->packets_out > 0) 
				sk->packets_out --;
			/* 
			 *	Wake up the process, it can probably write more. 
			 */
			if (!sk->dead) 
				sk->write_space(sk);
			oskb = sk->send_head;

			if (!(flag&2)) 	/* Not retransmitting */
			{
				long m;
	
				/*
				 *	The following amusing code comes from Jacobson's
				 *	article in SIGCOMM '88.  Note that rtt and mdev
				 *	are scaled versions of rtt and mean deviation.
				 *	This is designed to be as fast as possible 
				 *	m stands for "measurement".
				 */
	
				m = jiffies - oskb->when;  /* RTT */
				if(m<=0)
					m=1;		/* IS THIS RIGHT FOR <0 ??? */
				m -= (sk->rtt >> 3);    /* m is now error in rtt est */
				sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
				if (m < 0)
					m = -m;		/* m is now abs(error) */
				m -= (sk->mdev >> 2);   /* similar update on mdev */
				sk->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
	
				/*
				 *	Now update timeout.  Note that this removes any backoff.
				 */
			 
				sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
				if (sk->rto > 120*HZ)
					sk->rto = 120*HZ;
				if (sk->rto < 20)	/* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
					sk->rto = 20;
				sk->backoff = 0;
			}
			flag |= (2|4);	/* 2 is really more like 'don't adjust the rtt 
			                   In this case as we just set it up */
			cli();
			oskb = sk->send_head;
			IS_SKB(oskb);
			sk->send_head = oskb->link3;
			if (sk->send_head == NULL) 
			{
				sk->send_tail = NULL;
			}

		/*
		 *	We may need to remove this from the dev send list. 
		 */

			if (oskb->next)
				skb_unlink(oskb);
			sti();
			kfree_skb(oskb, FREE_WRITE); /* write. */
			if (!sk->dead) 
				sk->write_space(sk);
		}
		else
		{
			break;
		}
	}

	/*
	 * XXX someone ought to look at this too.. at the moment, if skb_peek()
	 * returns non-NULL, we complete ignore the timer stuff in the else
	 * clause.  We ought to organize the code so that else clause can
	 * (should) be executed regardless, possibly moving the PROBE timer
	 * reset over.  The skb_peek() thing should only move stuff to the
	 * write queue, NOT also manage the timer functions.
	 */

	/*
	 * Maybe we can take some stuff off of the write queue,
	 * and put it onto the xmit queue.
	 */
	if (skb_peek(&sk->write_queue) != NULL) 
	{
		if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
		        (sk->retransmits == 0 || 
			 sk->ip_xmit_timeout != TIME_WRITE ||
			 before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
			&& sk->packets_out < sk->cong_window) 
		{
			/*
			 *	Add more data to the send queue.
			 */
			flag |= 1;
			tcp_write_xmit(sk);
		}
		else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 			sk->send_head == NULL &&
 			sk->ack_backlog == 0 &&
 			sk->state != TCP_TIME_WAIT) 
 		{
 			/*
 			 *	Data to queue but no room.
 			 */
 	        	reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
 		}		
	}
	else
	{
		/*
		 * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
		 * from TCP_CLOSE we don't do anything
		 *
		 * from anything else, if there is write data (or fin) pending,
		 * we use a TIME_WRITE timeout, else if keepalive we reset to
		 * a KEEPALIVE timeout, else we delete the timer.
		 *
		 * We do not set flag for nominal write data, otherwise we may
		 * force a state where we start to write itsy bitsy tidbits
		 * of data.
		 */

		switch(sk->state) {
		case TCP_TIME_WAIT:
			/*
			 * keep us in TIME_WAIT until we stop getting packets,
			 * reset the timeout.
			 */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			break;
		case TCP_CLOSE:
			/*
			 * don't touch the timer.
			 */
			break;
		default:
			/*
			 * 	Must check send_head, write_queue, and ack_backlog
			 * 	to determine which timeout to use.
			 */
			if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
				reset_xmit_timer(sk, TIME_WRITE, sk->rto);
			} else if (sk->keepopen) {
				reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
			} else {
				del_timer(&sk->retransmit_timer);
				sk->ip_xmit_timeout = 0;
			}
			break;
		}
	}

	/*
	 *	We have nothing queued but space to send. Send any partial
	 *	packets immediately (end of Nagle rule application).
	 */
	 
	if (sk->packets_out == 0 && sk->partial != NULL &&
		skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
	{
		flag |= 1;
		tcp_send_partial(sk);
	}

	/*
	 * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
	 * we are now waiting for an acknowledge to our FIN.  The other end is
	 * already in TIME_WAIT.
	 *
	 * Move to TCP_CLOSE on success.
	 */

	if (sk->state == TCP_LAST_ACK) 
	{
		if (!sk->dead)
			sk->state_change(sk);
		if(sk->debug)
			printk("rcv_ack_seq: %lX==%lX, acked_seq: %lX==%lX\n",
				sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
		if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) 
		{
			flag |= 1;
			tcp_set_state(sk,TCP_CLOSE);
			sk->shutdown = SHUTDOWN_MASK;
		}
	}

	/*
	 *	Incoming ACK to a FIN we sent in the case of our initiating the close.
	 *
	 *	Move to FIN_WAIT2 to await a FIN from the other end. Set
	 *	SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
	 */

	if (sk->state == TCP_FIN_WAIT1) 
	{

		if (!sk->dead) 
			sk->state_change(sk);
		if (sk->rcv_ack_seq == sk->write_seq) 
		{
			flag |= 1;
			sk->shutdown |= SEND_SHUTDOWN;
			tcp_set_state(sk, TCP_FIN_WAIT2);
		}
	}

	/*
	 *	Incoming ACK to a FIN we sent in the case of a simultaneous close.
	 *
	 *	Move to TIME_WAIT
	 */

	if (sk->state == TCP_CLOSING) 
	{

		if (!sk->dead) 
			sk->state_change(sk);
		if (sk->rcv_ack_seq == sk->write_seq) 
		{
			flag |= 1;
			tcp_time_wait(sk);
		}
	}
	
	/*
	 *	Final ack of a three way shake 
	 */
	 
	if(sk->state==TCP_SYN_RECV)
	{
		tcp_set_state(sk, TCP_ESTABLISHED);
		tcp_options(sk,th);
		sk->dummy_th.dest=th->source;
		sk->copied_seq = sk->acked_seq;
		if(!sk->dead)
			sk->state_change(sk);
		if(sk->max_window==0)
		{
			sk->max_window=32;	/* Sanity check */
			sk->mss=min(sk->max_window,sk->mtu);
		}
	}
	
	/*
	 * I make no guarantees about the first clause in the following
	 * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
	 * what conditions "!flag" would be true.  However I think the rest
	 * of the conditions would prevent that from causing any
	 * unnecessary retransmission. 
	 *   Clearly if the first packet has expired it should be 
	 * retransmitted.  The other alternative, "flag&2 && retransmits", is
	 * harder to explain:  You have to look carefully at how and when the
	 * timer is set and with what timeout.  The most recent transmission always
	 * sets the timer.  So in general if the most recent thing has timed
	 * out, everything before it has as well.  So we want to go ahead and
	 * retransmit some more.  If we didn't explicitly test for this
	 * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
	 * would not be true.  If you look at the pattern of timing, you can
	 * show that rto is increased fast enough that the next packet would
	 * almost never be retransmitted immediately.  Then you'd end up
	 * waiting for a timeout to send each packet on the retransmission
	 * queue.  With my implementation of the Karn sampling algorithm,
	 * the timeout would double each time.  The net result is that it would
	 * take a hideous amount of time to recover from a single dropped packet.
	 * It's possible that there should also be a test for TIME_WRITE, but
	 * I think as long as "send_head != NULL" and "retransmit" is on, we've
	 * got to be in real retransmission mode.
	 *   Note that tcp_do_retransmit is called with all==1.  Setting cong_window
	 * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
	 * As long as no further losses occur, this seems reasonable.
	 */
	
	if (((!flag) || (flag&4)) && sk->send_head != NULL &&
	       (((flag&2) && sk->retransmits) ||
	       (sk->send_head->when + sk->rto < jiffies))) 
	{
		if(sk->send_head->when + sk->rto < jiffies)
			tcp_retransmit(sk,0);	
		else
		{
			tcp_do_retransmit(sk, 1);
			reset_xmit_timer(sk, TIME_WRITE, sk->rto);
		}
	}

	return(1);
}


/*
 * 	Process the FIN bit. This now behaves as it is supposed to work
 *	and the FIN takes effect when it is validly part of sequence
 *	space. Not before when we get holes.
 *
 *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
 *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
 *	TIME-WAIT)
 *
 *	If we are in FINWAIT-1, a received FIN indicates simultaneous
 *	close and we go into CLOSING (and later onto TIME-WAIT)
 *
 *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
 *
 */
 
static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
{
	sk->fin_seq = th->seq + skb->len + th->syn + th->fin;

	if (!sk->dead) 
	{
		sk->state_change(sk);
		sock_wake_async(sk->socket, 1);
	}

	switch(sk->state) 
	{
		case TCP_SYN_RECV:
		case TCP_SYN_SENT:
		case TCP_ESTABLISHED:
			/*
			 * move to CLOSE_WAIT, tcp_data() already handled
			 * sending the ack.
			 */
			tcp_set_state(sk,TCP_CLOSE_WAIT);
			if (th->rst)
				sk->shutdown = SHUTDOWN_MASK;
			break;

		case TCP_CLOSE_WAIT:
		case TCP_CLOSING:
			/*
			 * received a retransmission of the FIN, do
			 * nothing.
			 */
			break;
		case TCP_TIME_WAIT:
			/*
			 * received a retransmission of the FIN,
			 * restart the TIME_WAIT timer.
			 */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			return(0);
		case TCP_FIN_WAIT1:
			/*
			 * This case occurs when a simultaneous close
			 * happens, we must ack the received FIN and
			 * enter the CLOSING state.
			 *
			 * This causes a WRITE timeout, which will either
			 * move on to TIME_WAIT when we timeout, or resend
			 * the FIN properly (maybe we get rid of that annoying
			 * FIN lost hang). The TIME_WRITE code is already correct
			 * for handling this timeout.
			 */

			if(sk->ip_xmit_timeout != TIME_WRITE)
				reset_xmit_timer(sk, TIME_WRITE, sk->rto);
			tcp_set_state(sk,TCP_CLOSING);
			break;
		case TCP_FIN_WAIT2:
			/*
			 * received a FIN -- send ACK and enter TIME_WAIT
			 */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			sk->shutdown|=SHUTDOWN_MASK;
			tcp_set_state(sk,TCP_TIME_WAIT);
			break;
		case TCP_CLOSE:
			/*
			 * already in CLOSE
			 */
			break;
		default:
			tcp_set_state(sk,TCP_LAST_ACK);
	
			/* Start the timers. */
			reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
			return(0);
	}

	return(0);
}


/*
 *	This routine handles the data.  If there is room in the buffer,
 *	it will be have already been moved into it.  If there is no
 *	room, then we will just have to discard the packet.
 */

extern __inline__ int tcp_data(struct sk_buff *skb, struct sock *sk, 
	 unsigned long saddr, unsigned short len)
{
	struct sk_buff *skb1, *skb2;
	struct tcphdr *th;
	int dup_dumped=0;
	unsigned long new_seq;
	unsigned long shut_seq;

	th = skb->h.th;
	skb->len = len -(th->doff*4);

	/*
	 *	The bytes in the receive read/assembly queue has increased. Needed for the
	 *	low memory discard algorithm 
	 */
	   
	sk->bytes_rcv += skb->len;
	
	if (skb->len == 0 && !th->fin) 
	{
		/* 
		 *	Don't want to keep passing ack's back and forth. 
		 *	(someone sent us dataless, boring frame)
		 */
		if (!th->ack)
			tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
		kfree_skb(skb, FREE_READ);
		return(0);
	}
	
	/*
	 *	We no longer have anyone receiving data on this connection.
	 */

#ifndef TCP_DONT_RST_SHUTDOWN		 

	if(sk->shutdown & RCV_SHUTDOWN)
	{
		/*
		 *	FIXME: BSD has some magic to avoid sending resets to
		 *	broken 4.2 BSD keepalives. Much to my surprise a few non
		 *	BSD stacks still have broken keepalives so we want to
		 *	cope with it.
		 */

		if(skb->len)	/* We don't care if it's just an ack or
				   a keepalive/window probe */
		{
			new_seq= th->seq + skb->len + th->syn;	/* Right edge of _data_ part of frame */
			
			/* Do this the way 4.4BSD treats it. Not what I'd
			   regard as the meaning of the spec but it's what BSD
			   does and clearly they know everything 8) */

			/*
			 *	This is valid because of two things
			 *
			 *	a) The way tcp_data behaves at the bottom.
			 *	b) A fin takes effect when read not when received.
			 */
			 
			shut_seq=sk->acked_seq+1;	/* Last byte */
			
			if(after(new_seq,shut_seq))
			{
				if(sk->debug)
					printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n",
						sk, new_seq, shut_seq, sk->blog);
				if(sk->dead)
				{
					sk->acked_seq = new_seq + th->fin;
					tcp_reset(sk->saddr, sk->daddr, skb->h.th,
						sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
					tcp_statistics.TcpEstabResets++;
					tcp_set_state(sk,TCP_CLOSE);
					sk->err = EPIPE;
					sk->shutdown = SHUTDOWN_MASK;
					kfree_skb(skb, FREE_READ);
					return 0;
				}
			}
		}
	}

#endif

	/*
	 * 	Now we have to walk the chain, and figure out where this one
	 * 	goes into it.  This is set up so that the last packet we received
	 * 	will be the first one we look at, that way if everything comes
	 * 	in order, there will be no performance loss, and if they come
	 * 	out of order we will be able to fit things in nicely.
	 *
	 *	[AC: This is wrong. We should assume in order first and then walk
	 *	 forwards from the first hole based upon real traffic patterns.]
	 *	
	 */

	if (skb_peek(&sk->receive_queue) == NULL) 	/* Empty queue is easy case */
	{
		skb_queue_head(&sk->receive_queue,skb);
		skb1= NULL;
	} 
	else
	{
		for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
		{
			if(sk->debug)
			{
				printk("skb1=%p :", skb1);
				printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
				printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
				printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
						sk->acked_seq);
			}
			
			/*
			 *	Optimisation: Duplicate frame or extension of previous frame from
			 *	same sequence point (lost ack case).
			 *	The frame contains duplicate data or replaces a previous frame
			 *	discard the previous frame (safe as sk->inuse is set) and put
			 *	the new one in its place.
			 */
			 
			if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
			{
				skb_append(skb1,skb);
				skb_unlink(skb1);
				kfree_skb(skb1,FREE_READ);
				dup_dumped=1;
				skb1=NULL;
				break;
			}
			
			/*
			 *	Found where it fits
			 */
			 
			if (after(th->seq+1, skb1->h.th->seq))
			{
				skb_append(skb1,skb);
				break;
			}
			
			/*
			 *	See if we've hit the start. If so insert.
			 */
			if (skb1 == skb_peek(&sk->receive_queue))
			{
				skb_queue_head(&sk->receive_queue, skb);
				break;
			}
		}
  	}

	/*
	 *	Figure out what the ack value for this frame is
	 */
	 
 	th->ack_seq = th->seq + skb->len;
 	if (th->syn) 
 		th->ack_seq++;
 	if (th->fin)
 		th->ack_seq++;

	if (before(sk->acked_seq, sk->copied_seq)) 
	{
		printk("*** tcp.c:tcp_data bug acked < copied\n");
		sk->acked_seq = sk->copied_seq;
	}

	/*
	 *	Now figure out if we can ack anything. This is very messy because we really want two
	 *	receive queues, a completed and an assembly queue. We also want only one transmit
	 *	queue.
	 */

	if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
	{
		if (before(th->seq, sk->acked_seq+1)) 
		{
			int newwindow;

			if (after(th->ack_seq, sk->acked_seq)) 
			{
				newwindow = sk->window-(th->ack_seq - sk->acked_seq);
				if (newwindow < 0)
					newwindow = 0;	
				sk->window = newwindow;
				sk->acked_seq = th->ack_seq;
			}
			skb->acked = 1;

			/*
			 *	When we ack the fin, we do the FIN 
			 *	processing.
			 */

			if (skb->h.th->fin) 
			{
				tcp_fin(skb,sk,skb->h.th);
			}
	  
			for(skb2 = skb->next;
			    skb2 != (struct sk_buff *)&sk->receive_queue;
			    skb2 = skb2->next) 
			{
				if (before(skb2->h.th->seq, sk->acked_seq+1)) 
				{
					if (after(skb2->h.th->ack_seq, sk->acked_seq))
					{
						newwindow = sk->window -
						 (skb2->h.th->ack_seq - sk->acked_seq);
						if (newwindow < 0)
							newwindow = 0;	
						sk->window = newwindow;
						sk->acked_seq = skb2->h.th->ack_seq;
					}
					skb2->acked = 1;
					/*
					 * 	When we ack the fin, we do
					 * 	the fin handling.
					 */
					if (skb2->h.th->fin) 
					{
						tcp_fin(skb,sk,skb->h.th);
					}

					/*
					 *	Force an immediate ack.
					 */
					 
					sk->ack_backlog = sk->max_ack_backlog;
				}
				else
				{
					break;
				}
			}

			/*
			 *	This also takes care of updating the window.
			 *	This if statement needs to be simplified.
			 */
			if (!sk->delay_acks ||
			    sk->ack_backlog >= sk->max_ack_backlog || 
			    sk->bytes_rcv > sk->max_unacked || th->fin) {
	/*			tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
			}
			else 
			{
				sk->ack_backlog++;
				if(sk->debug)
					printk("Ack queued.\n");
				reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
			}
		}
	}

	/*
	 *	If we've missed a packet, send an ack.
	 *	Also start a timer to send another.
	 */
	 
	if (!skb->acked) 
	{
	
	/*
	 *	This is important.  If we don't have much room left,
	 *	we need to throw out a few packets so we have a good
	 *	window.  Note that mtu is used, not mss, because mss is really
	 *	for the send side.  He could be sending us stuff as large as mtu.
	 */
		 
		while (sk->prot->rspace(sk) < sk->mtu) 
		{
			skb1 = skb_peek(&sk->receive_queue);
			if (skb1 == NULL) 
			{
				printk("INET: tcp.c:tcp_data memory leak detected.\n");
				break;
			}

			/*
			 *	Don't throw out something that has been acked. 
			 */
		 
			if (skb1->acked) 
			{
				break;
			}
		
			skb_unlink(skb1);
			kfree_skb(skb1, FREE_READ);
		}
		tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
		sk->ack_backlog++;
		reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
	}
	else
	{
		tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
	}

	/*
	 *	Now tell the user we may have some data. 
	 */
	 
	if (!sk->dead) 
	{
        	if(sk->debug)
        		printk("Data wakeup.\n");
		sk->data_ready(sk,0);
	} 
	return(0);
}


/*
 *	This routine is only called when we have urgent data
 *	signalled. Its the 'slow' part of tcp_urg. It could be
 *	moved inline now as tcp_urg is only called from one
 *	place. We handle URGent data wrong. We have to - as
 *	BSD still doesn't use the correction from RFC961.
 */
 
static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
{
	unsigned long ptr = ntohs(th->urg_ptr);

	if (ptr)
		ptr--;
	ptr += th->seq;

	/* ignore urgent data that we've already seen and read */
	if (after(sk->copied_seq, ptr))
		return;

	/* do we already have a newer (or duplicate) urgent pointer? */
	if (sk->urg_data && !after(ptr, sk->urg_seq))
		return;

	/* tell the world about our new urgent pointer */
	if (sk->proc != 0) {
		if (sk->proc > 0) {
			kill_proc(sk->proc, SIGURG, 1);
		} else {
			kill_pg(-sk->proc, SIGURG, 1);
		}
	}
	sk->urg_data = URG_NOTYET;
	sk->urg_seq = ptr;
}

/*
 *	This is the 'fast' part of urgent handling.
 */
 
extern __inline__ int tcp_urg(struct sock *sk, struct tcphdr *th,
	unsigned long saddr, unsigned long len)
{
	unsigned long ptr;

	/*
	 *	Check if we get a new urgent pointer - normally not 
	 */
	 
	if (th->urg)
		tcp_check_urg(sk,th);

	/*
	 *	Do we wait for any urgent data? - normally not
	 */
	 
	if (sk->urg_data != URG_NOTYET)
		return 0;

	/*
	 *	Is the urgent pointer pointing into this packet? 
	 */
	 
	ptr = sk->urg_seq - th->seq + th->doff*4;
	if (ptr >= len)
		return 0;

	/*
	 *	Ok, got the correct packet, update info 
	 */
	 
	sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
	if (!sk->dead)
		sk->data_ready(sk,0);
	return 0;
}

/*
 *	This will accept the next outstanding connection. 
 */
 
static struct sock *tcp_accept(struct sock *sk, int flags)
{
	struct sock *newsk;
	struct sk_buff *skb;
  
  /*
   * We need to make sure that this socket is listening,
   * and that it has something pending.
   */

	if (sk->state != TCP_LISTEN) 
	{
		sk->err = EINVAL;
		return(NULL); 
	}

	/* Avoid the race. */
	cli();
	sk->inuse = 1;

	while((skb = tcp_dequeue_established(sk)) == NULL) 
	{
		if (flags & O_NONBLOCK) 
		{
			sti();
			release_sock(sk);
			sk->err = EAGAIN;
			return(NULL);
		}

		release_sock(sk);
		interruptible_sleep_on(sk->sleep);
		if (current->signal & ~current->blocked) 
		{
			sti();
			sk->err = ERESTARTSYS;
			return(NULL);
		}
		sk->inuse = 1;
  	}
	sti();

	/*
	 *	Now all we need to do is return skb->sk. 
	 */

	newsk = skb->sk;

	kfree_skb(skb, FREE_READ);
	sk->ack_backlog--;
	release_sock(sk);
	return(newsk);
}


/*
 *	This will initiate an outgoing connection. 
 */
 
static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
{
	struct sk_buff *buff;
	struct device *dev=NULL;
	unsigned char *ptr;
	int tmp;
	int atype;
	struct tcphdr *t1;
	struct rtable *rt;

	if (sk->state != TCP_CLOSE) 
	{
		return(-EISCONN);
	}
	
	if (addr_len < 8) 
		return(-EINVAL);

	if (usin->sin_family && usin->sin_family != AF_INET) 
		return(-EAFNOSUPPORT);

  	/*
  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
  	 */
  	
  	if(usin->sin_addr.s_addr==INADDR_ANY)
		usin->sin_addr.s_addr=ip_my_addr();
		  
	/*
	 *	Don't want a TCP connection going to a broadcast address 
	 */

	if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
		return -ENETUNREACH;
  
	sk->inuse = 1;
	sk->daddr = usin->sin_addr.s_addr;
	sk->write_seq = tcp_init_seq();
	sk->window_seq = sk->write_seq;
	sk->rcv_ack_seq = sk->write_seq -1;
	sk->err = 0;
	sk->dummy_th.dest = usin->sin_port;
	release_sock(sk);

	buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
	if (buff == NULL) 
	{
		return(-ENOMEM);
	}
	sk->inuse = 1;
	buff->len = 24;
	buff->sk = sk;
	buff->free = 0;
	buff->localroute = sk->localroute;
	
	t1 = (struct tcphdr *) buff->data;

	/*
	 *	Put in the IP header and routing stuff. 
	 */
	 
	rt=ip_rt_route(sk->daddr, NULL, NULL);
	

	/*
	 *	We need to build the routing stuff from the things saved in skb. 
	 */

	tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
					IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
	if (tmp < 0) 
	{
		sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
		release_sock(sk);
		return(-ENETUNREACH);
	}

	buff->len += tmp;
	t1 = (struct tcphdr *)((char *)t1 +tmp);

	memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
	t1->seq = ntohl(sk->write_seq++);
	sk->sent_seq = sk->write_seq;
	buff->h.seq = sk->write_seq;
	t1->ack = 0;
	t1->window = 2;
	t1->res1=0;
	t1->res2=0;
	t1->rst = 0;
	t1->urg = 0;
	t1->psh = 0;
	t1->syn = 1;
	t1->urg_ptr = 0;
	t1->doff = 6;
	/* use 512 or whatever user asked for */
	
	if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
		sk->window_clamp=rt->rt_window;
	else
		sk->window_clamp=0;

	if (sk->user_mss)
		sk->mtu = sk->user_mss;
	else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
		sk->mtu = rt->rt_mss;
	else 
	{
#ifdef CONFIG_INET_SNARL
		if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
#else
		if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
#endif
			sk->mtu = 576 - HEADER_SIZE;
		else
			sk->mtu = MAX_WINDOW;
	}
	/*
	 *	but not bigger than device MTU 
	 */

	if(sk->mtu <32)
		sk->mtu = 32;	/* Sanity limit */
		
	sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
	
	/*
	 *	Put in the TCP options to say MTU. 
	 */

	ptr = (unsigned char *)(t1+1);
	ptr[0] = 2;
	ptr[1] = 4;
	ptr[2] = (sk->mtu) >> 8;
	ptr[3] = (sk->mtu) & 0xff;
	tcp_send_check(t1, sk->saddr, sk->daddr,
		  sizeof(struct tcphdr) + 4, sk);

	/*
	 *	This must go first otherwise a really quick response will get reset. 
	 */

	tcp_set_state(sk,TCP_SYN_SENT);
	sk->rto = TCP_TIMEOUT_INIT;
#if 0 /* we already did this */
	init_timer(&sk->retransmit_timer); 
#endif
	sk->retransmit_timer.function=&retransmit_timer;
	sk->retransmit_timer.data = (unsigned long)sk;
	reset_xmit_timer(sk, TIME_WRITE, sk->rto);	/* Timer for repeating the SYN until an answer */
	sk->retransmits = TCP_SYN_RETRIES;

	sk->prot->queue_xmit(sk, dev, buff, 0);  
	reset_xmit_timer(sk, TIME_WRITE, sk->rto);
	tcp_statistics.TcpActiveOpens++;
	tcp_statistics.TcpOutSegs++;
  
	release_sock(sk);
	return(0);
}


/* This functions checks to see if the tcp header is actually acceptable. */
extern __inline__ int tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
	     struct options *opt, unsigned long saddr, struct device *dev)
{
	unsigned long next_seq;

	next_seq = len - 4*th->doff;
	if (th->fin)
		next_seq++;
	/* if we have a zero window, we can't have any data in the packet.. */
	if (next_seq && !sk->window)
		goto ignore_it;
	next_seq += th->seq;

	/*
	 * This isn't quite right.  sk->acked_seq could be more recent
	 * than sk->window.  This is however close enough.  We will accept
	 * slightly more packets than we should, but it should not cause
	 * problems unless someone is trying to forge packets.
	 */

	/* have we already seen all of this packet? */
	if (!after(next_seq+1, sk->acked_seq))
		goto ignore_it;
	/* or does it start beyond the window? */
	if (!before(th->seq, sk->acked_seq + sk->window + 1))
		goto ignore_it;

	/* ok, at least part of this packet would seem interesting.. */
	return 1;

ignore_it:
	if (th->rst)
		return 0;

	/*
	 *	Send a reset if we get something not ours and we are
	 *	unsynchronized. Note: We don't do anything to our end. We
	 *	are just killing the bogus remote connection then we will
	 *	connect again and it will work (with luck).
	 */
  	 
	if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
	{
		tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
		return 1;
	}

	/* Try to resync things. */
	tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
	return 0;
}

/*
 *	When we get a reset we do this.
 */

static int tcp_std_reset(struct sock *sk, struct sk_buff *skb)
{
	sk->zapped = 1;
	sk->err = ECONNRESET;
	if (sk->state == TCP_SYN_SENT)
		sk->err = ECONNREFUSED;
	if (sk->state == TCP_CLOSE_WAIT)
		sk->err = EPIPE;
#ifdef TCP_DO_RFC1337		
	/*
	 *	Time wait assassination protection [RFC1337]
	 */
	if(sk->state!=TCP_TIME_WAIT)
	{	
		tcp_set_state(sk,TCP_CLOSE);
		sk->shutdown = SHUTDOWN_MASK;
	}
#else	
	tcp_set_state(sk,TCP_CLOSE);
	sk->shutdown = SHUTDOWN_MASK;
#endif	
	if (!sk->dead) 
		sk->state_change(sk);
	kfree_skb(skb, FREE_READ);
	release_sock(sk);
	return(0);
}

/*
 *	A TCP packet has arrived.
 */
 
int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
	unsigned long daddr, unsigned short len,
	unsigned long saddr, int redo, struct inet_protocol * protocol)
{
	struct tcphdr *th;
	struct sock *sk;
	int syn_ok=0;
	
	if (!skb) 
	{
		printk("IMPOSSIBLE 1\n");
		return(0);
	}

	if (!dev) 
	{
		printk("IMPOSSIBLE 2\n");
		return(0);
	}
  
	tcp_statistics.TcpInSegs++;
  
	if(skb->pkt_type!=PACKET_HOST)
	{
	  	kfree_skb(skb,FREE_READ);
	  	return(0);
	}
  
	th = skb->h.th;

	/*
	 *	Find the socket.
	 */

	sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);

	/*
	 *	If this socket has got a reset it's to all intents and purposes 
  	 *	really dead. Count closed sockets as dead.
  	 *
  	 *	Note: BSD appears to have a bug here. A 'closed' TCP in BSD
  	 *	simply drops data. This seems incorrect as a 'closed' TCP doesn't
  	 *	exist so should cause resets as if the port was unreachable.
  	 */
  	 
	if (sk!=NULL && (sk->zapped || sk->state==TCP_CLOSE))
		sk=NULL;

	if (!redo) 
	{
		if (tcp_check(th, len, saddr, daddr )) 
		{
			skb->sk = NULL;
			kfree_skb(skb,FREE_READ);
			/*
			 *	We don't release the socket because it was
			 *	never marked in use.
			 */
			return(0);
		}
		th->seq = ntohl(th->seq);

		/* See if we know about the socket. */
		if (sk == NULL) 
		{
			/*
			 *	No such TCB. If th->rst is 0 send a reset (checked in tcp_reset)
			 */
			tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
			skb->sk = NULL;
			/*
			 *	Discard frame
			 */
			kfree_skb(skb, FREE_READ);
			return(0);
		}

		skb->len = len;
		skb->acked = 0;
		skb->used = 0;
		skb->free = 0;
		skb->saddr = daddr;
		skb->daddr = saddr;
	
		/* We may need to add it to the backlog here. */
		cli();
		if (sk->inuse) 
		{
			skb_queue_tail(&sk->back_log, skb);
			sti();
			return(0);
		}
		sk->inuse = 1;
		sti();
	}
	else
	{
		if (sk==NULL) 
		{
			tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
			skb->sk = NULL;
			kfree_skb(skb, FREE_READ);
			return(0);
		}
	}


	if (!sk->prot) 
	{
		printk("IMPOSSIBLE 3\n");
		return(0);
	}


	/*
	 *	Charge the memory to the socket. 
	 */
	 
	if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
	{
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return(0);
	}

	skb->sk=sk;
	sk->rmem_alloc += skb->mem_len;

	/*
	 *	This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
	 *	don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
	 *	compatibility. We also set up variables more thoroughly [Karn notes in the
	 *	KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
	 */

	if(sk->state!=TCP_ESTABLISHED)		/* Skip this lot for normal flow */
	{
	
		/*
		 *	Now deal with unusual cases.
		 */
	 
		if(sk->state==TCP_LISTEN)
		{
			if(th->ack)	/* These use the socket TOS.. might want to be the received TOS */
				tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);

			/*
			 *	We don't care for RST, and non SYN are absorbed (old segments)
			 *	Broadcast/multicast SYN isn't allowed. Note - bug if you change the
			 *	netmask on a running connection it can go broadcast. Even Sun's have
			 *	this problem so I'm ignoring it 
			 */
			   
			if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
			{
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return 0;
			}
		
			/*	
			 *	Guess we need to make a new socket up 
			 */
		
			tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
		
			/*
			 *	Now we have several options: In theory there is nothing else
			 *	in the frame. KA9Q has an option to send data with the syn,
			 *	BSD accepts data with the syn up to the [to be] advertised window
			 *	and Solaris 2.1 gives you a protocol error. For now we just ignore
			 *	it, that fits the spec precisely and avoids incompatibilities. It
			 *	would be nice in future to drop through and process the data.
			 */
			 
			release_sock(sk);
			return 0;
		}
	
		/* retransmitted SYN? */
		if (sk->state == TCP_SYN_RECV && th->syn && th->seq+1 == sk->acked_seq)
		{
			kfree_skb(skb, FREE_READ);
			release_sock(sk);
			return 0;
		}
		
		/*
		 *	SYN sent means we have to look for a suitable ack and either reset
		 *	for bad matches or go to connected 
		 */
	   
		if(sk->state==TCP_SYN_SENT)
		{
			/* Crossed SYN or previous junk segment */
			if(th->ack)
			{
				/* We got an ack, but it's not a good ack */
				if(!tcp_ack(sk,th,saddr,len))
				{
					/* Reset the ack - its an ack from a 
					   different connection  [ th->rst is checked in tcp_reset()] */
					tcp_statistics.TcpAttemptFails++;
					tcp_reset(daddr, saddr, th,
						sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
					kfree_skb(skb, FREE_READ);
					release_sock(sk);
					return(0);
				}
				if(th->rst)
					return tcp_std_reset(sk,skb);
				if(!th->syn)
				{
					/* A valid ack from a different connection
					   start. Shouldn't happen but cover it */
					kfree_skb(skb, FREE_READ);
					release_sock(sk);
					return 0;
				}
				/*
				 *	Ok.. it's good. Set up sequence numbers and
				 *	move to established.
				 */
				syn_ok=1;	/* Don't reset this connection for the syn */
				sk->acked_seq=th->seq+1;
				sk->fin_seq=th->seq;
				tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr);
				tcp_set_state(sk, TCP_ESTABLISHED);
				tcp_options(sk,th);
				sk->dummy_th.dest=th->source;
				sk->copied_seq = sk->acked_seq;
				if(!sk->dead)
				{
					sk->state_change(sk);
					sock_wake_async(sk->socket, 0);
				}
				if(sk->max_window==0)
				{
					sk->max_window = 32;
					sk->mss = min(sk->max_window, sk->mtu);
				}
			}
			else
			{
				/* See if SYN's cross. Drop if boring */
				if(th->syn && !th->rst)
				{
					/* Crossed SYN's are fine - but talking to
					   yourself is right out... */
					if(sk->saddr==saddr && sk->daddr==daddr &&
						sk->dummy_th.source==th->source &&
						sk->dummy_th.dest==th->dest)
					{
						tcp_statistics.TcpAttemptFails++;
						return tcp_std_reset(sk,skb);
					}
					tcp_set_state(sk,TCP_SYN_RECV);
					
					/*
					 *	FIXME:
					 *	Must send SYN|ACK here
					 */
				}		
				/* Discard junk segment */
				kfree_skb(skb, FREE_READ);
				release_sock(sk);
				return 0;
			}
			/*
			 *	SYN_RECV with data maybe.. drop through
			 */
			goto rfc_step6;
		}

	/*
	 *	BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
	 *	a more complex suggestion for fixing these reuse issues in RFC1644
	 *	but not yet ready for general use. Also see RFC1379.
	 */
	
#define BSD_TIME_WAIT
#ifdef BSD_TIME_WAIT
		if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && 
			after(th->seq, sk->acked_seq) && !th->rst)
		{
			long seq=sk->write_seq;
			if(sk->debug)
				printk("Doing a BSD time wait\n");
			tcp_statistics.TcpEstabResets++;	   
			sk->rmem_alloc -= skb->mem_len;
			skb->sk = NULL;
			sk->err=ECONNRESET;
			tcp_set_state(sk, TCP_CLOSE);
			sk->shutdown = SHUTDOWN_MASK;
			release_sock(sk);
			sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
			if (sk && sk->state==TCP_LISTEN)
			{
				sk->inuse=1;
				skb->sk = sk;
				sk->rmem_alloc += skb->mem_len;
				tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
				release_sock(sk);
				return 0;
			}
			kfree_skb(skb, FREE_READ);
			return 0;
		}
#endif	
	}

	/*
	 *	We are now in normal data flow (see the step list in the RFC)
	 *	Note most of these are inline now. I'll inline the lot when
	 *	I have time to test it hard and look at what gcc outputs 
	 */
	
	if(!tcp_sequence(sk,th,len,opt,saddr,dev))
	{
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return 0;
	}

	if(th->rst)
		return tcp_std_reset(sk,skb);
	
	/*
	 *	!syn_ok is effectively the state test in RFC793.
	 */
	 
	if(th->syn && !syn_ok)
	{
		tcp_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255);
		return tcp_std_reset(sk,skb);	
	}

	/*
	 *	Process the ACK
	 */
	 

	if(th->ack && !tcp_ack(sk,th,saddr,len))
	{
		/*
		 *	Our three way handshake failed.
		 */
		 
		if(sk->state==TCP_SYN_RECV)
		{
			tcp_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
		}
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return 0;
	}
	
rfc_step6:		/* I'll clean this up later */

	/*
	 *	Process urgent data
	 */
	 	
	if(tcp_urg(sk, th, saddr, len))
	{
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return 0;
	}
	
	
	/*
	 *	Process the encapsulated data
	 */
	
	if(tcp_data(skb,sk, saddr, len))
	{
		kfree_skb(skb, FREE_READ);
		release_sock(sk);
		return 0;
	}

	/*
	 *	And done
	 */	
	
	release_sock(sk);
	return 0;
}

/*
 *	This routine sends a packet with an out of date sequence
 *	number. It assumes the other end will try to ack it.
 */

static void tcp_write_wakeup(struct sock *sk)
{
	struct sk_buff *buff;
	struct tcphdr *t1;
	struct device *dev=NULL;
	int tmp;

	if (sk->zapped)
		return;	/* After a valid reset we can send no more */

	/*
	 *	Write data can still be transmitted/retransmitted in the
	 *	following states.  If any other state is encountered, return.
	 *	[listen/close will never occur here anyway]
	 */

	if (sk->state != TCP_ESTABLISHED && 
	    sk->state != TCP_CLOSE_WAIT &&
	    sk->state != TCP_FIN_WAIT1 && 
	    sk->state != TCP_LAST_ACK &&
	    sk->state != TCP_CLOSING
	) 
	{
		return;
	}

	buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
	if (buff == NULL) 
		return;

	buff->len = sizeof(struct tcphdr);
	buff->free = 1;
	buff->sk = sk;
	buff->localroute = sk->localroute;

	t1 = (struct tcphdr *) buff->data;

	/* Put in the IP header and routing stuff. */
	tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
				IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
	if (tmp < 0) 
	{
		sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
		return;
	}

	buff->len += tmp;
	t1 = (struct tcphdr *)((char *)t1 +tmp);

	memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));

	/*
	 *	Use a previous sequence.
	 *	This should cause the other end to send an ack.
	 */
	 
	t1->seq = htonl(sk->sent_seq-1);
	t1->ack = 1; 
	t1->res1= 0;
	t1->res2= 0;
	t1->rst = 0;
	t1->urg = 0;
	t1->psh = 0;
	t1->fin = 0;	/* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
	t1->syn = 0;
	t1->ack_seq = ntohl(sk->acked_seq);
	t1->window = ntohs(tcp_select_window(sk));
	t1->doff = sizeof(*t1)/4;
	tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
	 /*
	  *	Send it and free it.
   	  *	This will prevent the timer from automatically being restarted.
	  */
	sk->prot->queue_xmit(sk, dev, buff, 1);
	tcp_statistics.TcpOutSegs++;
}

/*
 *	A window probe timeout has occurred.
 */

void tcp_send_probe0(struct sock *sk)
{
	if (sk->zapped)
		return;		/* After a valid reset we can send no more */

	tcp_write_wakeup(sk);

	sk->backoff++;
	sk->rto = min(sk->rto << 1, 120*HZ);
	reset_xmit_timer (sk, TIME_PROBE0, sk->rto);
	sk->retransmits++;
	sk->prot->retransmits ++;
}

/*
 *	Socket option code for TCP. 
 */
  
int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
{
	int val,err;

	if(level!=SOL_TCP)
		return ip_setsockopt(sk,level,optname,optval,optlen);

  	if (optval == NULL) 
  		return(-EINVAL);

  	err=verify_area(VERIFY_READ, optval, sizeof(int));
  	if(err)
  		return err;
  	
  	val = get_fs_long((unsigned long *)optval);

	switch(optname)
	{
		case TCP_MAXSEG:
/*
 * values greater than interface MTU won't take effect.  however at
 * the point when this call is done we typically don't yet know
 * which interface is going to be used
 */
	  		if(val<1||val>MAX_WINDOW)
				return -EINVAL;
			sk->user_mss=val;
			return 0;
		case TCP_NODELAY:
			sk->nonagle=(val==0)?0:1;
			return 0;
		default:
			return(-ENOPROTOOPT);
	}
}

int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
{
	int val,err;

	if(level!=SOL_TCP)
		return ip_getsockopt(sk,level,optname,optval,optlen);
			
	switch(optname)
	{
		case TCP_MAXSEG:
			val=sk->user_mss;
			break;
		case TCP_NODELAY:
			val=sk->nonagle;
			break;
		default:
			return(-ENOPROTOOPT);
	}
	err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
	if(err)
  		return err;
  	put_fs_long(sizeof(int),(unsigned long *) optlen);

  	err=verify_area(VERIFY_WRITE, optval, sizeof(int));
  	if(err)
  		return err;
  	put_fs_long(val,(unsigned long *)optval);

  	return(0);
}	


struct proto tcp_prot = {
	sock_wmalloc,
	sock_rmalloc,
	sock_wfree,
	sock_rfree,
	sock_rspace,
	sock_wspace,
	tcp_close,
	tcp_read,
	tcp_write,
	tcp_sendto,
	tcp_recvfrom,
	ip_build_header,
	tcp_connect,
	tcp_accept,
	ip_queue_xmit,
	tcp_retransmit,
	tcp_write_wakeup,
	tcp_read_wakeup,
	tcp_rcv,
	tcp_select,
	tcp_ioctl,
	NULL,
	tcp_shutdown,
	tcp_setsockopt,
	tcp_getsockopt,
	128,
	0,
	{NULL,},
	"TCP",
	0, 0
};