Merge branch 'release' of ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6:
  [IA64] build fix for scatterlist
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 63df226..fb8258e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -205,20 +205,6 @@
 
 ---------------------------
 
-What:	Compaq touchscreen device emulation
-When:	Oct 2007
-Files:	drivers/input/tsdev.c
-Why:	The code says it was obsolete when it was written in 2001.
-	tslib is a userspace library which does anything tsdev can do and
-	much more besides in userspace where this code belongs. There is no
-	longer any need for tsdev and applications should have converted to
-	use tslib by now.
-	The name "tsdev" is also extremely confusing and lots of people have
-	it loaded when they don't need/use it.
-Who:	Richard Purdie <rpurdie@rpsys.net>
-
----------------------------
-
 What:	i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
 When:	September 2007
 Why:	Obsolete. The new i2c-gpio driver replaces all hardware-specific
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c323778..085e4a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1083,6 +1083,13 @@
 			[NFS] set the maximum lifetime for idmapper cache
 			entries.
 
+	nfs.enable_ino64=
+			[NFS] enable 64-bit inode numbers.
+			If zero, the NFS client will fake up a 32-bit inode
+			number for the readdir() and stat() syscalls instead
+			of returning the full 64-bit number.
+			The default is to return 64-bit inode numbers.
+
 	nmi_watchdog=	[KNL,BUGS=X86-32] Debugging features for SMP kernels
 
 	no387		[BUGS=X86-32] Tells the kernel to use the 387 maths
@@ -1883,9 +1890,6 @@
 			Format:
 			<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
 
-	tsdev.xres=	[TS] Horizontal screen resolution.
-	tsdev.yres=	[TS] Vertical screen resolution.
-
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
 			Format:
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 1da5666..1134062 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -281,6 +281,39 @@
 	will be rounded down to the nearest multiple.  The default
 	value is 0.
 
+fail_over_mac
+
+	Specifies whether active-backup mode should set all slaves to
+	the same MAC address (the traditional behavior), or, when
+	enabled, change the bond's MAC address when changing the
+	active interface (i.e., fail over the MAC address itself).
+
+	Fail over MAC is useful for devices that cannot ever alter
+	their MAC address, or for devices that refuse incoming
+	broadcasts with their own source MAC (which interferes with
+	the ARP monitor).
+
+	The down side of fail over MAC is that every device on the
+	network must be updated via gratuitous ARP, vs. just updating
+	a switch or set of switches (which often takes place for any
+	traffic, not just ARP traffic, if the switch snoops incoming
+	traffic to update its tables) for the traditional method.  If
+	the gratuitous ARP is lost, communication may be disrupted.
+
+	When fail over MAC is used in conjuction with the mii monitor,
+	devices which assert link up prior to being able to actually
+	transmit and receive are particularly susecptible to loss of
+	the gratuitous ARP, and an appropriate updelay setting may be
+	required.
+
+	A value of 0 disables fail over MAC, and is the default.  A
+	value of 1 enables fail over MAC.  This option is enabled
+	automatically if the first slave added cannot change its MAC
+	address.  This option may be modified via sysfs only when no
+	slaves are present in the bond.
+
+	This option was added in bonding version 3.2.0.
+
 lacp_rate
 
 	Option specifying the rate in which we'll ask our link partner
diff --git a/Documentation/networking/proc_net_tcp.txt b/Documentation/networking/proc_net_tcp.txt
index 5e21f7c..4a79209 100644
--- a/Documentation/networking/proc_net_tcp.txt
+++ b/Documentation/networking/proc_net_tcp.txt
@@ -1,8 +1,9 @@
 This document describes the interfaces /proc/net/tcp and /proc/net/tcp6.
+Note that these interfaces are deprecated in favor of tcp_diag.
 
 These /proc interfaces provide information about currently active TCP 
-connections, and are implemented by tcp_get_info() in net/ipv4/tcp_ipv4.c and
-tcp6_get_info() in net/ipv6/tcp_ipv6.c, respectively.
+connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c
+and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively.
 
 It will first list all listening TCP sockets, and next list all established
 TCP connections. A typical entry of /proc/net/tcp would look like this (split 
diff --git a/MAINTAINERS b/MAINTAINERS
index 12cee3d..c7355e7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2404,6 +2404,15 @@
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 
+LOCKDEP AND LOCKSTAT
+P:	Peter Zijlstra
+M:	peterz@infradead.org
+P:	Ingo Molnar
+M:	mingo@redhat.com
+L:	linux-kernel@vger.kernel.org
+T:	git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git
+S:	Maintained
+
 LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks)
 P:	Richard Russon (FlatCap)
 M:	ldm@flatcap.org
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 2c47db4..046e6d8 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -88,7 +88,7 @@
 #endif
 
 #if defined(CONFIG_KEYBOARD_BFIN) || defined(CONFIG_KEYBOARD_BFIN_MODULE)
-static int bf548_keymap[] = {
+static const unsigned int bf548_keymap[] = {
 	KEYVAL(0, 0, KEY_ENTER),
 	KEYVAL(0, 1, KEY_HELP),
 	KEYVAL(0, 2, KEY_0),
@@ -110,8 +110,8 @@
 static struct bfin_kpad_platform_data bf54x_kpad_data = {
 	.rows			= 4,
 	.cols			= 4,
-	.keymap 		= bf548_keymap,
-	.keymapsize 		= ARRAY_SIZE(bf548_keymap),
+	.keymap			= bf548_keymap,
+	.keymapsize		= ARRAY_SIZE(bf548_keymap),
 	.repeat			= 0,
 	.debounce_time		= 5000,	/* ns (5ms) */
 	.coldrive_time		= 1000, /* ns (1ms) */
diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index fbbccb5..880add1 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -1,6 +1,4 @@
 /*
- * linux/arch/m68k/atari/atakeyb.c
- *
  * Atari Keyboard driver for 680x0 Linux
  *
  * This file is subject to the terms and conditions of the GNU General Public
diff --git a/arch/mips/au1000/common/prom.c b/arch/mips/au1000/common/prom.c
index a8637cd..90d7069 100644
--- a/arch/mips/au1000/common/prom.c
+++ b/arch/mips/au1000/common/prom.c
@@ -33,7 +33,6 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -41,18 +40,16 @@
 
 #include <asm/bootinfo.h>
 
-/* #define DEBUG_CMDLINE */
-
-extern int prom_argc;
-extern char **prom_argv, **prom_envp;
-
+int prom_argc;
+char **prom_argv;
+char **prom_envp;
 
 char * __init_or_module prom_getcmdline(void)
 {
 	return &(arcs_cmdline[0]);
 }
 
-void  prom_init_cmdline(void)
+void prom_init_cmdline(void)
 {
 	char *cp;
 	int actr;
@@ -61,7 +58,7 @@
 
 	cp = &(arcs_cmdline[0]);
 	while(actr < prom_argc) {
-	        strcpy(cp, prom_argv[actr]);
+		strcpy(cp, prom_argv[actr]);
 		cp += strlen(prom_argv[actr]);
 		*cp++ = ' ';
 		actr++;
@@ -70,10 +67,8 @@
 		--cp;
 	if (prom_argc > 1)
 		*cp = '\0';
-
 }
 
-
 char *prom_getenv(char *envname)
 {
 	/*
@@ -95,21 +90,23 @@
 		}
 		env++;
 	}
+
 	return NULL;
 }
 
-inline unsigned char str2hexnum(unsigned char c)
+static inline unsigned char str2hexnum(unsigned char c)
 {
-	if(c >= '0' && c <= '9')
+	if (c >= '0' && c <= '9')
 		return c - '0';
-	if(c >= 'a' && c <= 'f')
+	if (c >= 'a' && c <= 'f')
 		return c - 'a' + 10;
-	if(c >= 'A' && c <= 'F')
+	if (c >= 'A' && c <= 'F')
 		return c - 'A' + 10;
+
 	return 0; /* foo */
 }
 
-inline void str2eaddr(unsigned char *ea, unsigned char *str)
+static inline void str2eaddr(unsigned char *ea, unsigned char *str)
 {
 	int i;
 
@@ -124,35 +121,29 @@
 	}
 }
 
-int get_ethernet_addr(char *ethernet_addr)
+int prom_get_ethernet_addr(char *ethernet_addr)
 {
-        char *ethaddr_str;
+	char *ethaddr_str;
+	char *argptr;
 
-        ethaddr_str = prom_getenv("ethaddr");
+	/* Check the environment variables first */
+	ethaddr_str = prom_getenv("ethaddr");
 	if (!ethaddr_str) {
-	        printk("ethaddr not set in boot prom\n");
-		return -1;
+		/* Check command line */
+		argptr = prom_getcmdline();
+		ethaddr_str = strstr(argptr, "ethaddr=");
+		if (!ethaddr_str)
+			return -1;
+
+		ethaddr_str += strlen("ethaddr=");
 	}
+
 	str2eaddr(ethernet_addr, ethaddr_str);
 
-#if 0
-	{
-		int i;
-
-	printk("get_ethernet_addr: ");
-	for (i=0; i<5; i++)
-		printk("%02x:", (unsigned char)*(ethernet_addr+i));
-	printk("%02x\n", *(ethernet_addr+i));
-	}
-#endif
-
 	return 0;
 }
+EXPORT_SYMBOL(prom_get_ethernet_addr);
 
 void __init prom_free_prom_memory(void)
 {
 }
-
-EXPORT_SYMBOL(prom_getcmdline);
-EXPORT_SYMBOL(get_ethernet_addr);
-EXPORT_SYMBOL(str2eaddr);
diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c
index b212c07..a90d425 100644
--- a/arch/mips/au1000/common/setup.c
+++ b/arch/mips/au1000/common/setup.c
@@ -40,10 +40,11 @@
 #include <asm/mipsregs.h>
 #include <asm/reboot.h>
 #include <asm/pgtable.h>
-#include <asm/mach-au1x00/au1000.h>
 #include <asm/time.h>
 
-extern char * prom_getcmdline(void);
+#include <au1000.h>
+#include <prom.h>
+
 extern void __init board_setup(void);
 extern void au1000_restart(char *);
 extern void au1000_halt(void);
diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c
index 4d7bcfc..43298fd 100644
--- a/arch/mips/au1000/db1x00/init.c
+++ b/arch/mips/au1000/db1x00/init.c
@@ -31,15 +31,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c
index 2aa7b2e..cdeae32 100644
--- a/arch/mips/au1000/mtx-1/init.c
+++ b/arch/mips/au1000/mtx-1/init.c
@@ -34,13 +34,11 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
+
 #include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c
index 4535f72..ddccaf6 100644
--- a/arch/mips/au1000/pb1000/init.c
+++ b/arch/mips/au1000/pb1000/init.c
@@ -30,15 +30,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c
index 7ba6852..c93fd39 100644
--- a/arch/mips/au1000/pb1100/init.c
+++ b/arch/mips/au1000/pb1100/init.c
@@ -31,15 +31,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/pb1200/board_setup.c b/arch/mips/au1000/pb1200/board_setup.c
index 2122515..5dbc986 100644
--- a/arch/mips/au1000/pb1200/board_setup.c
+++ b/arch/mips/au1000/pb1200/board_setup.c
@@ -41,8 +41,10 @@
 #include <asm/mipsregs.h>
 #include <asm/reboot.h>
 #include <asm/pgtable.h>
-#include <asm/mach-au1x00/au1000.h>
-#include <asm/mach-au1x00/au1xxx_dbdma.h>
+
+#include <au1000.h>
+#include <au1xxx_dbdma.h>
+#include <prom.h>
 
 #ifdef CONFIG_MIPS_PB1200
 #include <asm/mach-pb1x00/pb1200.h>
diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c
index 5a70029..c251570 100644
--- a/arch/mips/au1000/pb1200/init.c
+++ b/arch/mips/au1000/pb1200/init.c
@@ -31,15 +31,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c
index e58a9d6..507d4b2 100644
--- a/arch/mips/au1000/pb1500/init.c
+++ b/arch/mips/au1000/pb1500/init.c
@@ -31,15 +31,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c
index fad53bf..b03eee6 100644
--- a/arch/mips/au1000/pb1550/init.c
+++ b/arch/mips/au1000/pb1550/init.c
@@ -31,15 +31,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c
index 9f839c3..6532939 100644
--- a/arch/mips/au1000/xxs1500/init.c
+++ b/arch/mips/au1000/xxs1500/init.c
@@ -30,15 +30,13 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void  __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
 
 const char *get_system_type(void)
 {
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 1245b2f..095988f 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -77,12 +77,7 @@
 {
 	pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
 
-	dcr_write(msic->dcr_host, msic->dcr_host.base + dcr_n, val);
-}
-
-static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n)
-{
-	return dcr_read(msic->dcr_host, msic->dcr_host.base + dcr_n);
+	dcr_write(msic->dcr_host, dcr_n, val);
 }
 
 static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
@@ -91,7 +86,7 @@
 	u32 write_offset, msi;
 	int idx;
 
-	write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG);
+	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
 	pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
 
 	/* write_offset doesn't wrap properly, so we have to mask it */
@@ -306,7 +301,7 @@
 	list_for_each_entry(msic, &axon_msic_list, list) {
 		pr_debug("axon_msi: disabling %s\n",
 			  msic->irq_host->of_node->full_name);
-		tmp  = msic_dcr_read(msic, MSIC_CTRL_REG);
+		tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
 		tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
 		msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
 	}
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index ab11c0b..427027c 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -126,13 +126,13 @@
 }
 EXPORT_SYMBOL_GPL(dcr_map);
 
-void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c)
+void dcr_unmap(dcr_host_t host, unsigned int dcr_c)
 {
 	dcr_host_t h = host;
 
 	if (h.token == NULL)
 		return;
-	h.token += dcr_n * h.stride;
+	h.token += host.base * h.stride;
 	iounmap(h.token);
 	h.token = NULL;
 }
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 893e654..e479388 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -156,7 +156,7 @@
 	switch(type) {
 #ifdef CONFIG_PPC_DCR
 	case mpic_access_dcr:
-		return dcr_read(rb->dhost, rb->dhost.base + reg);
+		return dcr_read(rb->dhost, reg);
 #endif
 	case mpic_access_mmio_be:
 		return in_be32(rb->base + (reg >> 2));
@@ -173,7 +173,7 @@
 	switch(type) {
 #ifdef CONFIG_PPC_DCR
 	case mpic_access_dcr:
-		return dcr_write(rb->dhost, rb->dhost.base + reg, value);
+		return dcr_write(rb->dhost, reg, value);
 #endif
 	case mpic_access_mmio_be:
 		return out_be32(rb->base + (reg >> 2), value);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index f3bceb1..139ca15 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -68,9 +68,15 @@
 	l	%r1,BASED(.Ltrace_irq_off)
 	basr	%r14,%r1
 	.endm
+
+	.macro	LOCKDEP_SYS_EXIT
+	l	%r1,BASED(.Llockdep_sys_exit)
+	basr	%r14,%r1
+	.endm
 #else
 #define TRACE_IRQS_ON
 #define TRACE_IRQS_OFF
+#define LOCKDEP_SYS_EXIT
 #endif
 
 /*
@@ -260,6 +266,7 @@
 	bno	BASED(sysc_leave)
 	tm	__TI_flags+3(%r9),_TIF_WORK_SVC
 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 sysc_leave:
 	RESTORE_ALL __LC_RETURN_PSW,1
 
@@ -283,6 +290,7 @@
 	bo	BASED(sysc_restart)
 	tm	__TI_flags+3(%r9),_TIF_SINGLE_STEP
 	bo	BASED(sysc_singlestep)
+	LOCKDEP_SYS_EXIT
 	b	BASED(sysc_leave)
 
 #
@@ -572,6 +580,7 @@
 #endif
 	tm	__TI_flags+3(%r9),_TIF_WORK_INT
 	bnz	BASED(io_work)		# there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 io_leave:
 	RESTORE_ALL __LC_RETURN_PSW,0
 io_done:
@@ -618,6 +627,7 @@
 	bo	BASED(io_reschedule)
 	tm	__TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
 	bnz	BASED(io_sigpending)
+	LOCKDEP_SYS_EXIT
 	b	BASED(io_leave)
 
 #
@@ -1040,6 +1050,8 @@
 .Ltrace_irq_on: .long	trace_hardirqs_on
 .Ltrace_irq_off:
 		.long	trace_hardirqs_off
+.Llockdep_sys_exit:
+		.long	lockdep_sys_exit
 #endif
 .Lcritical_start:
 		.long	__critical_start + 0x80000000
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9c0d5cc..05e26d1 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -66,9 +66,14 @@
 	.macro	TRACE_IRQS_OFF
 	 brasl	%r14,trace_hardirqs_off
 	.endm
+
+	.macro	LOCKDEP_SYS_EXIT
+	 brasl	%r14,lockdep_sys_exit
+	.endm
 #else
 #define TRACE_IRQS_ON
 #define TRACE_IRQS_OFF
+#define LOCKDEP_SYS_EXIT
 #endif
 
 	.macro	STORE_TIMER lc_offset
@@ -255,6 +260,7 @@
 	jno	sysc_leave
 	tm	__TI_flags+7(%r9),_TIF_WORK_SVC
 	jnz	sysc_work	# there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 sysc_leave:
 	RESTORE_ALL __LC_RETURN_PSW,1
 
@@ -278,6 +284,7 @@
 	jo	sysc_restart
 	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
 	jo	sysc_singlestep
+	LOCKDEP_SYS_EXIT
 	j	sysc_leave
 
 #
@@ -558,6 +565,7 @@
 #endif
 	tm	__TI_flags+7(%r9),_TIF_WORK_INT
 	jnz	io_work 		# there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 io_leave:
 	RESTORE_ALL __LC_RETURN_PSW,0
 io_done:
@@ -605,6 +613,7 @@
 	jo	io_reschedule
 	tm	__TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
 	jnz	io_sigpending
+	LOCKDEP_SYS_EXIT
 	j	io_leave
 
 #
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 290b7bc..8099fea0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -251,6 +251,7 @@
 	jb resume_kernel		# not returning to v8086 or userspace
 
 ENTRY(resume_userspace)
+	LOCKDEP_SYS_EXIT
  	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
@@ -338,6 +339,7 @@
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
+	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
@@ -377,6 +379,7 @@
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)		# store the return value
 syscall_exit:
+	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
@@ -467,6 +470,7 @@
 	jz work_notifysig
 work_resched:
 	call schedule
+	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1d232e5..f1cacd4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -244,6 +244,7 @@
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	flagmask */
 sysret_check:		
+	LOCKDEP_SYS_EXIT
 	GET_THREAD_INFO(%rcx)
 	cli
 	TRACE_IRQS_OFF
@@ -333,6 +334,7 @@
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	mask to check */
 int_with_check:
+	LOCKDEP_SYS_EXIT_IRQ
 	GET_THREAD_INFO(%rcx)
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
@@ -544,11 +546,13 @@
 retint_with_reschedule:
 	movl $_TIF_WORK_MASK,%edi
 retint_check:
+	LOCKDEP_SYS_EXIT_IRQ
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
 	CFI_REMEMBER_STATE
 	jnz  retint_careful
-retint_swapgs:	 	
+
+retint_swapgs:		/* return to user-space */
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
@@ -557,7 +561,7 @@
 	swapgs 
 	jmp restore_args
 
-retint_restore_args:				
+retint_restore_args:	/* return to kernel space */
 	cli
 	/*
 	 * The iretq could re-enable interrupts:
@@ -866,26 +870,21 @@
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */ 
 	movq $-1,ORIG_RAX(%rsp)
 	call *%rax
-	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */	 
-error_exit:		
-	movl %ebx,%eax		
+	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+error_exit:
+	movl %ebx,%eax
 	RESTORE_REST
 	cli
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)	
 	testl %eax,%eax
 	jne  retint_kernel
+	LOCKDEP_SYS_EXIT_IRQ
 	movl  threadinfo_flags(%rcx),%edx
 	movl  $_TIF_WORK_MASK,%edi
 	andl  %edi,%edx
 	jnz  retint_careful
-	/*
-	 * The iret might restore flags:
-	 */
-	TRACE_IRQS_IRETQ
-	swapgs 
-	RESTORE_ARGS 0,8,0						
-	jmp iret_label
+	jmp retint_swapgs
 	CFI_ENDPROC
 
 error_kernelspace:
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index c2d03e9..e7d0d3c 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -557,6 +557,12 @@
 
 	resume_execution(cur, regs, kcb);
 	regs->eflags |= kcb->kprobe_saved_eflags;
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+	if (raw_irqs_disabled_flags(regs->eflags))
+		trace_hardirqs_off();
+	else
+		trace_hardirqs_on();
+#endif
 
 	/*Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -694,6 +700,7 @@
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
 	regs->eflags &= ~IF_MASK;
+	trace_hardirqs_off();
 	regs->eip = (unsigned long)(jp->entry);
 	return 1;
 }
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 1df17a0..62e28e5 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -544,6 +544,12 @@
 
 	resume_execution(cur, regs, kcb);
 	regs->eflags |= kcb->kprobe_saved_rflags;
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+	if (raw_irqs_disabled_flags(regs->eflags))
+		trace_hardirqs_off();
+	else
+		trace_hardirqs_on();
+#endif
 
 	/* Restore the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -684,6 +690,7 @@
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
 	regs->eflags &= ~IF_MASK;
+	trace_hardirqs_off();
 	regs->rip = (unsigned long)(jp->entry);
 	return 1;
 }
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 55e586d..6ea73f3 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -50,6 +50,10 @@
 	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
 	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
 #endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	thunk lockdep_sys_exit_thunk,lockdep_sys_exit
+#endif
 	
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 4672066..33f5eb0 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -272,6 +272,15 @@
 
 	  If unsure, say N.
 
+config PATA_CS5536
+	tristate "CS5536 PATA support (Experimental)"
+	depends on PCI && X86 && !X86_64 && EXPERIMENTAL
+	help
+	  This option enables support for the AMD CS5536
+	  companion chip used with the Geode LX processor family.
+
+	  If unsure, say N.
+
 config PATA_CYPRESS
 	tristate "Cypress CY82C693 PATA support (Very Experimental)"
 	depends on PCI && EXPERIMENTAL
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 2a63645..6bdc307 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -28,6 +28,7 @@
 obj-$(CONFIG_PATA_CS5520)	+= pata_cs5520.o
 obj-$(CONFIG_PATA_CS5530)	+= pata_cs5530.o
 obj-$(CONFIG_PATA_CS5535)	+= pata_cs5535.o
+obj-$(CONFIG_PATA_CS5536)	+= pata_cs5536.o
 obj-$(CONFIG_PATA_CYPRESS)	+= pata_cypress.o
 obj-$(CONFIG_PATA_EFAR)		+= pata_efar.o
 obj-$(CONFIG_PATA_HPT366)	+= pata_hpt366.o
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 9ce4aa9..3c6f43e 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -130,6 +130,7 @@
 	ich8_sata_ahci		= 9,
 	piix_pata_mwdma		= 10,	/* PIIX3 MWDMA only */
 	tolapai_sata_ahci	= 11,
+	ich9_2port_sata		= 12,
 
 	/* constants for mapping table */
 	P0			= 0,  /* port 0 */
@@ -238,19 +239,19 @@
 	/* SATA Controller 1 IDE (ICH8) */
 	{ 0x8086, 0x2820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
 	/* SATA Controller 2 IDE (ICH8) */
-	{ 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+	{ 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
 	/* Mobile SATA Controller IDE (ICH8M) */
 	{ 0x8086, 0x2828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
 	/* SATA Controller IDE (ICH9) */
 	{ 0x8086, 0x2920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
 	/* SATA Controller IDE (ICH9) */
-	{ 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+	{ 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
 	/* SATA Controller IDE (ICH9) */
-	{ 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+	{ 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
 	/* SATA Controller IDE (ICH9M) */
-	{ 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+	{ 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
 	/* SATA Controller IDE (ICH9M) */
-	{ 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+	{ 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
 	/* SATA Controller IDE (ICH9M) */
 	{ 0x8086, 0x292e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
 	/* SATA Controller IDE (Tolapai) */
@@ -448,6 +449,18 @@
 	},
 };
 
+static const struct piix_map_db ich9_2port_map_db = {
+	.mask = 0x3,
+	.port_enable = 0x3,
+	.map = {
+		/* PM   PS   SM   SS       MAP */
+		{  P0,  NA,  P1,  NA }, /* 00b */
+		{  RV,  RV,  RV,  RV }, /* 01b */
+		{  RV,  RV,  RV,  RV }, /* 10b */
+		{  RV,  RV,  RV,  RV },
+	},
+};
+
 static const struct piix_map_db *piix_map_db_table[] = {
 	[ich5_sata]		= &ich5_map_db,
 	[ich6_sata]		= &ich6_map_db,
@@ -455,6 +468,7 @@
 	[ich6m_sata_ahci]	= &ich6m_map_db,
 	[ich8_sata_ahci]	= &ich8_map_db,
 	[tolapai_sata_ahci]	= &tolapai_map_db,
+	[ich9_2port_sata]	= &ich9_2port_map_db,
 };
 
 static struct ata_port_info piix_port_info[] = {
@@ -570,6 +584,17 @@
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &piix_sata_ops,
 	},
+
+	[ich9_2port_sata] =
+	{
+		.sht		= &piix_sht,
+		.flags		= PIIX_SATA_FLAGS | PIIX_FLAG_SCR |
+				  PIIX_FLAG_AHCI,
+		.pio_mask	= 0x1f,	/* pio0-4 */
+		.mwdma_mask	= 0x07, /* mwdma0-2 */
+		.udma_mask	= ATA_UDMA6,
+		.port_ops	= &piix_sata_ops,
+	},
 };
 
 static struct pci_bits piix_enable_bits[] = {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b05384a..68699b3 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3984,6 +3984,7 @@
 	{ "ST9120822AS",	"3.CLF",	ATA_HORKAGE_NONCQ, },
 	{ "ST9160821AS",	"3.CLF",	ATA_HORKAGE_NONCQ, },
 	{ "ST9160821AS",	"3.ALD",	ATA_HORKAGE_NONCQ, },
+	{ "ST9160821AS",	"3.CCD",	ATA_HORKAGE_NONCQ, },
 	{ "ST3160812AS",	"3.ADJ",	ATA_HORKAGE_NONCQ, },
 	{ "ST980813AS",		"3.ADB",	ATA_HORKAGE_NONCQ, },
 	{ "SAMSUNG HD401LJ",	"ZZ100-15",	ATA_HORKAGE_NONCQ, },
@@ -4013,8 +4014,14 @@
 	p = strchr(patt, wildchar);
 	if (p && ((*(p + 1)) == 0))
 		len = p - patt;
-	else
+	else {
 		len = strlen(name);
+		if (!len) {
+			if (!*patt)
+				return 0;
+			return -1;
+		}
+	}
 
 	return strncmp(patt, name, len);
 }
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index ea53e6a..d63c81e 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1363,6 +1363,7 @@
 static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_eh_info *ehi = &qc->dev->link->eh_info;
 	struct scsi_cmnd *cmd = qc->scsicmd;
 	u8 *cdb = cmd->cmnd;
  	int need_sense = (qc->err_mask != 0);
@@ -1376,14 +1377,14 @@
 		case ATA_CMD_SET_FEATURES:
 			if ((qc->tf.feature == SETFEATURES_WC_ON) ||
 			    (qc->tf.feature == SETFEATURES_WC_OFF)) {
-				ap->link.eh_info.action |= ATA_EH_REVALIDATE;
+				ehi->action |= ATA_EH_REVALIDATE;
 				ata_port_schedule_eh(ap);
 			}
 			break;
 
 		case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
 		case ATA_CMD_SET_MULTI: /* multi_count changed */
-			ap->link.eh_info.action |= ATA_EH_REVALIDATE;
+			ehi->action |= ATA_EH_REVALIDATE;
 			ata_port_schedule_eh(ap);
 			break;
 		}
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
new file mode 100644
index 0000000..53070f6
--- /dev/null
+++ b/drivers/ata/pata_cs5536.c
@@ -0,0 +1,344 @@
+/*
+ * pata_cs5536.c	- CS5536 PATA for new ATA layer
+ *			  (C) 2007 Martin K. Petersen <mkp@mkp.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA
+ *
+ * Documentation:
+ *	Available from AMD web site.
+ *
+ * The IDE timing registers for the CS5536 live in the Geode Machine
+ * Specific Register file and not PCI config space.  Most BIOSes
+ * virtualize the PCI registers so the chip looks like a standard IDE
+ * controller.	Unfortunately not all implementations get this right.
+ * In particular some have problems with unaligned accesses to the
+ * virtualized PCI registers.  This driver always does full dword
+ * writes to work around the issue.  Also, in case of a bad BIOS this
+ * driver can be loaded with the "msr=1" parameter which forces using
+ * the Machine Specific Registers to configure the device.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/libata.h>
+#include <scsi/scsi_host.h>
+#include <asm/msr.h>
+
+#define DRV_NAME	"pata_cs5536"
+#define DRV_VERSION	"0.0.5"
+
+enum {
+	CFG			= 0,
+	DTC			= 1,
+	CAST			= 2,
+	ETC			= 3,
+
+	MSR_IDE_BASE		= 0x51300000,
+	MSR_IDE_CFG		= (MSR_IDE_BASE + 0x10),
+	MSR_IDE_DTC		= (MSR_IDE_BASE + 0x12),
+	MSR_IDE_CAST		= (MSR_IDE_BASE + 0x13),
+	MSR_IDE_ETC		= (MSR_IDE_BASE + 0x14),
+
+	PCI_IDE_CFG		= 0x40,
+	PCI_IDE_DTC		= 0x48,
+	PCI_IDE_CAST		= 0x4c,
+	PCI_IDE_ETC		= 0x50,
+
+	IDE_CFG_CHANEN		= 0x2,
+	IDE_CFG_CABLE		= 0x10000,
+
+	IDE_D0_SHIFT		= 24,
+	IDE_D1_SHIFT		= 16,
+	IDE_DRV_MASK		= 0xff,
+
+	IDE_CAST_D0_SHIFT	= 6,
+	IDE_CAST_D1_SHIFT	= 4,
+	IDE_CAST_DRV_MASK	= 0x3,
+	IDE_CAST_CMD_MASK	= 0xff,
+	IDE_CAST_CMD_SHIFT	= 24,
+
+	IDE_ETC_NODMA		= 0x03,
+};
+
+static int use_msr;
+
+static const u32 msr_reg[4] = {
+	MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC,
+};
+
+static const u8 pci_reg[4] = {
+	PCI_IDE_CFG, PCI_IDE_DTC, PCI_IDE_CAST, PCI_IDE_ETC,
+};
+
+static inline int cs5536_read(struct pci_dev *pdev, int reg, int *val)
+{
+	if (unlikely(use_msr)) {
+		u32 dummy;
+
+		rdmsr(msr_reg[reg], *val, dummy);
+		return 0;
+	}
+
+	return pci_read_config_dword(pdev, pci_reg[reg], val);
+}
+
+static inline int cs5536_write(struct pci_dev *pdev, int reg, int val)
+{
+	if (unlikely(use_msr)) {
+		wrmsr(msr_reg[reg], val, 0);
+		return 0;
+	}
+
+	return pci_write_config_dword(pdev, pci_reg[reg], val);
+}
+
+/**
+ *	cs5536_cable_detect	-	detect cable type
+ *	@ap: Port to detect on
+ *	@deadline: deadline jiffies for the operation
+ *
+ *	Perform cable detection for ATA66 capable cable. Return a libata
+ *	cable type.
+ */
+
+static int cs5536_cable_detect(struct ata_port *ap)
+{
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	u32 cfg;
+
+	cs5536_read(pdev, CFG, &cfg);
+
+	if (cfg & (IDE_CFG_CABLE << ap->port_no))
+		return ATA_CBL_PATA80;
+	else
+		return ATA_CBL_PATA40;
+}
+
+/**
+ *	cs5536_set_piomode		-	PIO setup
+ *	@ap: ATA interface
+ *	@adev: device on the interface
+ */
+
+static void cs5536_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+	static const u8 drv_timings[5] = {
+		0x98, 0x55, 0x32, 0x21, 0x20,
+	};
+
+	static const u8 addr_timings[5] = {
+		0x2, 0x1, 0x0, 0x0, 0x0,
+	};
+
+	static const u8 cmd_timings[5] = {
+		0x99, 0x92, 0x90, 0x22, 0x20,
+	};
+
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	struct ata_device *pair = ata_dev_pair(adev);
+	int mode = adev->pio_mode - XFER_PIO_0;
+	int cmdmode = mode;
+	int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT;
+	int cshift = ap->port_no ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
+	u32 dtc, cast, etc;
+
+	if (pair)
+		cmdmode = min(mode, pair->pio_mode - XFER_PIO_0);
+
+	cs5536_read(pdev, DTC, &dtc);
+	cs5536_read(pdev, CAST, &cast);
+	cs5536_read(pdev, ETC, &etc);
+
+	dtc &= ~(IDE_DRV_MASK << dshift);
+	dtc |= drv_timings[mode] << dshift;
+
+	cast &= ~(IDE_CAST_DRV_MASK << cshift);
+	cast |= addr_timings[mode] << cshift;
+
+	cast &= ~(IDE_CAST_CMD_MASK << IDE_CAST_CMD_SHIFT);
+	cast |= cmd_timings[cmdmode] << IDE_CAST_CMD_SHIFT;
+
+	etc &= ~(IDE_DRV_MASK << dshift);
+	etc |= IDE_ETC_NODMA << dshift;
+
+	cs5536_write(pdev, DTC, dtc);
+	cs5536_write(pdev, CAST, cast);
+	cs5536_write(pdev, ETC, etc);
+}
+
+/**
+ *	cs5536_set_dmamode		-	DMA timing setup
+ *	@ap: ATA interface
+ *	@adev: Device being configured
+ *
+ */
+
+static void cs5536_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+	static const u8 udma_timings[6] = {
+		0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6,
+	};
+
+	static const u8 mwdma_timings[3] = {
+		0x67, 0x21, 0x20,
+	};
+
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	u32 dtc, etc;
+	int mode = adev->dma_mode;
+	int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT;
+
+	if (mode >= XFER_UDMA_0) {
+		cs5536_read(pdev, ETC, &etc);
+
+		etc &= ~(IDE_DRV_MASK << dshift);
+		etc |= udma_timings[mode - XFER_UDMA_0] << dshift;
+
+		cs5536_write(pdev, ETC, etc);
+	} else { /* MWDMA */
+		cs5536_read(pdev, DTC, &dtc);
+
+		dtc &= ~(IDE_DRV_MASK << dshift);
+		dtc |= mwdma_timings[mode] << dshift;
+
+		cs5536_write(pdev, DTC, dtc);
+	}
+}
+
+static struct scsi_host_template cs5536_sht = {
+	.module			= THIS_MODULE,
+	.name			= DRV_NAME,
+	.ioctl			= ata_scsi_ioctl,
+	.queuecommand		= ata_scsi_queuecmd,
+	.can_queue		= ATA_DEF_QUEUE,
+	.this_id		= ATA_SHT_THIS_ID,
+	.sg_tablesize		= LIBATA_MAX_PRD,
+	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,
+	.emulated		= ATA_SHT_EMULATED,
+	.use_clustering		= ATA_SHT_USE_CLUSTERING,
+	.proc_name		= DRV_NAME,
+	.dma_boundary		= ATA_DMA_BOUNDARY,
+	.slave_configure	= ata_scsi_slave_config,
+	.slave_destroy		= ata_scsi_slave_destroy,
+	.bios_param		= ata_std_bios_param,
+};
+
+static struct ata_port_operations cs5536_port_ops = {
+	.set_piomode		= cs5536_set_piomode,
+	.set_dmamode		= cs5536_set_dmamode,
+	.mode_filter		= ata_pci_default_filter,
+
+	.tf_load		= ata_tf_load,
+	.tf_read		= ata_tf_read,
+	.check_status		= ata_check_status,
+	.exec_command		= ata_exec_command,
+	.dev_select		= ata_std_dev_select,
+
+	.freeze			= ata_bmdma_freeze,
+	.thaw			= ata_bmdma_thaw,
+	.error_handler		= ata_bmdma_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
+	.cable_detect		= cs5536_cable_detect,
+
+	.bmdma_setup		= ata_bmdma_setup,
+	.bmdma_start		= ata_bmdma_start,
+	.bmdma_stop		= ata_bmdma_stop,
+	.bmdma_status		= ata_bmdma_status,
+
+	.qc_prep		= ata_qc_prep,
+	.qc_issue		= ata_qc_issue_prot,
+
+	.data_xfer		= ata_data_xfer,
+
+	.irq_handler		= ata_interrupt,
+	.irq_clear		= ata_bmdma_irq_clear,
+	.irq_on			= ata_irq_on,
+
+	.port_start		= ata_port_start,
+};
+
+/**
+ *	cs5536_init_one
+ *	@dev: PCI device
+ *	@id: Entry in match table
+ *
+ */
+
+static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	static const struct ata_port_info info = {
+		.sht = &cs5536_sht,
+		.flags = ATA_FLAG_SLAVE_POSS,
+		.pio_mask = 0x1f,
+		.mwdma_mask = 0x07,
+		.udma_mask = ATA_UDMA5,
+		.port_ops = &cs5536_port_ops,
+	};
+
+	const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info };
+	u32 cfg;
+
+	if (use_msr)
+		printk(KERN_ERR DRV_NAME ": Using MSR regs instead of PCI\n");
+
+	cs5536_read(dev, CFG, &cfg);
+
+	if ((cfg & IDE_CFG_CHANEN) == 0) {
+		printk(KERN_ERR DRV_NAME ": disabled by BIOS\n");
+		return -ENODEV;
+	}
+
+	return ata_pci_init_one(dev, ppi);
+}
+
+static const struct pci_device_id cs5536[] = {
+	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_CS5536_IDE), },
+	{ },
+};
+
+static struct pci_driver cs5536_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= cs5536,
+	.probe		= cs5536_init_one,
+	.remove		= ata_pci_remove_one,
+#ifdef CONFIG_PM
+	.suspend	= ata_pci_device_suspend,
+	.resume		= ata_pci_device_resume,
+#endif
+};
+
+static int __init cs5536_init(void)
+{
+	return pci_register_driver(&cs5536_pci_driver);
+}
+
+static void __exit cs5536_exit(void)
+{
+	pci_unregister_driver(&cs5536_pci_driver);
+}
+
+MODULE_AUTHOR("Martin K. Petersen");
+MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cs5536);
+MODULE_VERSION(DRV_VERSION);
+module_param_named(msr, use_msr, int, 0644);
+MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
+
+module_init(cs5536_init);
+module_exit(cs5536_exit);
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 782ff4a..5db2013 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -353,6 +353,7 @@
 
 static struct pcmcia_device_id pcmcia_devices[] = {
 	PCMCIA_DEVICE_FUNC_ID(4),
+	PCMCIA_DEVICE_MANF_CARD(0x0000, 0x0000),	/* Corsair */
 	PCMCIA_DEVICE_MANF_CARD(0x0007, 0x0000),	/* Hitachi */
 	PCMCIA_DEVICE_MANF_CARD(0x000a, 0x0000),	/* I-O Data CFA */
 	PCMCIA_DEVICE_MANF_CARD(0x001c, 0x0001),	/* Mitsubishi CFA */
@@ -378,6 +379,7 @@
 	PCMCIA_DEVICE_PROD_ID12("EXP   ", "CD-ROM", 0x0a5c52fd, 0x66536591),
 	PCMCIA_DEVICE_PROD_ID12("EXP   ", "PnPIDE", 0x0a5c52fd, 0x0c694728),
 	PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e),
+	PCMCIA_DEVICE_PROD_ID12("Hyperstone", "Model1", 0x3d5b9ef5, 0xca6ab420),
 	PCMCIA_DEVICE_PROD_ID12("HITACHI", "FLASH", 0xf4f43949, 0x9eb86aae),
 	PCMCIA_DEVICE_PROD_ID12("HITACHI", "microdrive", 0xf4f43949, 0xa6d76178),
 	PCMCIA_DEVICE_PROD_ID12("IBM", "microdrive", 0xb569a6e5, 0xa6d76178),
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 2eb75cd..4dc2e73 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -279,7 +279,7 @@
  *	Returns the final clock settings.
  */
 
-static u8 sil680_init_chip(struct pci_dev *pdev)
+static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio)
 {
 	u32 class_rev	= 0;
 	u8 tmpbyte	= 0;
@@ -297,6 +297,8 @@
 	dev_dbg(&pdev->dev, "sil680: BA5_EN = %d clock = %02X\n",
 		tmpbyte & 1, tmpbyte & 0x30);
 
+	*try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5);
+
 	switch(tmpbyte & 0x30) {
 		case 0x00:
 			/* 133 clock attempt to force it on */
@@ -361,25 +363,76 @@
 	};
 	const struct ata_port_info *ppi[] = { &info, NULL };
 	static int printed_version;
+	struct ata_host *host;
+	void __iomem *mmio_base;
+	int rc, try_mmio;
 
 	if (!printed_version++)
 		dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n");
 
-	switch(sil680_init_chip(pdev))
-	{
+	switch (sil680_init_chip(pdev, &try_mmio)) {
 		case 0:
 			ppi[0] = &info_slow;
 			break;
 		case 0x30:
 			return -ENODEV;
 	}
+
+	if (!try_mmio)
+		goto use_ioports;
+
+	/* Try to acquire MMIO resources and fallback to PIO if
+	 * that fails
+	 */
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+	rc = pcim_iomap_regions(pdev, 1 << SIL680_MMIO_BAR, DRV_NAME);
+	if (rc)
+		goto use_ioports;
+
+	/* Allocate host and set it up */
+	host = ata_host_alloc_pinfo(&pdev->dev, ppi, 2);
+	if (!host)
+		return -ENOMEM;
+	host->iomap = pcim_iomap_table(pdev);
+
+	/* Setup DMA masks */
+	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	if (rc)
+		return rc;
+	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	if (rc)
+		return rc;
+	pci_set_master(pdev);
+
+	/* Get MMIO base and initialize port addresses */
+	mmio_base = host->iomap[SIL680_MMIO_BAR];
+	host->ports[0]->ioaddr.bmdma_addr = mmio_base + 0x00;
+	host->ports[0]->ioaddr.cmd_addr = mmio_base + 0x80;
+	host->ports[0]->ioaddr.ctl_addr = mmio_base + 0x8a;
+	host->ports[0]->ioaddr.altstatus_addr = mmio_base + 0x8a;
+	ata_std_ports(&host->ports[0]->ioaddr);
+	host->ports[1]->ioaddr.bmdma_addr = mmio_base + 0x08;
+	host->ports[1]->ioaddr.cmd_addr = mmio_base + 0xc0;
+	host->ports[1]->ioaddr.ctl_addr = mmio_base + 0xca;
+	host->ports[1]->ioaddr.altstatus_addr = mmio_base + 0xca;
+	ata_std_ports(&host->ports[1]->ioaddr);
+
+	/* Register & activate */
+	return ata_host_activate(host, pdev->irq, ata_interrupt, IRQF_SHARED,
+				 &sil680_sht);
+
+use_ioports:
 	return ata_pci_init_one(pdev, ppi);
 }
 
 #ifdef CONFIG_PM
 static int sil680_reinit_one(struct pci_dev *pdev)
 {
-	sil680_init_chip(pdev);
+	int try_mmio;
+
+	sil680_init_chip(pdev, &try_mmio);
 	return ata_pci_device_resume(pdev);
 }
 #endif
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 40557fe2..240a892 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -169,6 +169,35 @@
 	NV_ADMA_PORT_REGISTER_MODE	= (1 << 0),
 	NV_ADMA_ATAPI_SETUP_COMPLETE	= (1 << 1),
 
+	/* MCP55 reg offset */
+	NV_CTL_MCP55			= 0x400,
+	NV_INT_STATUS_MCP55		= 0x440,
+	NV_INT_ENABLE_MCP55		= 0x444,
+	NV_NCQ_REG_MCP55		= 0x448,
+
+	/* MCP55 */
+	NV_INT_ALL_MCP55		= 0xffff,
+	NV_INT_PORT_SHIFT_MCP55		= 16,	/* each port occupies 16 bits */
+	NV_INT_MASK_MCP55		= NV_INT_ALL_MCP55 & 0xfffd,
+
+	/* SWNCQ ENABLE BITS*/
+	NV_CTL_PRI_SWNCQ		= 0x02,
+	NV_CTL_SEC_SWNCQ		= 0x04,
+
+	/* SW NCQ status bits*/
+	NV_SWNCQ_IRQ_DEV		= (1 << 0),
+	NV_SWNCQ_IRQ_PM			= (1 << 1),
+	NV_SWNCQ_IRQ_ADDED		= (1 << 2),
+	NV_SWNCQ_IRQ_REMOVED		= (1 << 3),
+
+	NV_SWNCQ_IRQ_BACKOUT		= (1 << 4),
+	NV_SWNCQ_IRQ_SDBFIS		= (1 << 5),
+	NV_SWNCQ_IRQ_DHREGFIS		= (1 << 6),
+	NV_SWNCQ_IRQ_DMASETUP		= (1 << 7),
+
+	NV_SWNCQ_IRQ_HOTPLUG		= NV_SWNCQ_IRQ_ADDED |
+					  NV_SWNCQ_IRQ_REMOVED,
+
 };
 
 /* ADMA Physical Region Descriptor - one SG segment */
@@ -226,6 +255,42 @@
 	unsigned long		type;
 };
 
+struct defer_queue {
+	u32		defer_bits;
+	unsigned int	head;
+	unsigned int	tail;
+	unsigned int	tag[ATA_MAX_QUEUE];
+};
+
+enum ncq_saw_flag_list {
+	ncq_saw_d2h	= (1U << 0),
+	ncq_saw_dmas	= (1U << 1),
+	ncq_saw_sdb	= (1U << 2),
+	ncq_saw_backout	= (1U << 3),
+};
+
+struct nv_swncq_port_priv {
+	struct ata_prd	*prd;	 /* our SG list */
+	dma_addr_t	prd_dma; /* and its DMA mapping */
+	void __iomem	*sactive_block;
+	void __iomem	*irq_block;
+	void __iomem	*tag_block;
+	u32		qc_active;
+
+	unsigned int	last_issue_tag;
+
+	/* fifo circular queue to store deferral command */
+	struct defer_queue defer_queue;
+
+	/* for NCQ interrupt analysis */
+	u32		dhfis_bits;
+	u32		dmafis_bits;
+	u32		sdbfis_bits;
+
+	unsigned int	ncq_flags;
+};
+
+
 #define NV_ADMA_CHECK_INTR(GCTL, PORT) ((GCTL) & ( 1 << (19 + (12 * (PORT)))))
 
 static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
@@ -263,13 +328,29 @@
 static void nv_adma_post_internal_cmd(struct ata_queued_cmd *qc);
 static void nv_adma_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
 
+static void nv_mcp55_thaw(struct ata_port *ap);
+static void nv_mcp55_freeze(struct ata_port *ap);
+static void nv_swncq_error_handler(struct ata_port *ap);
+static int nv_swncq_slave_config(struct scsi_device *sdev);
+static int nv_swncq_port_start(struct ata_port *ap);
+static void nv_swncq_qc_prep(struct ata_queued_cmd *qc);
+static void nv_swncq_fill_sg(struct ata_queued_cmd *qc);
+static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc);
+static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis);
+static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance);
+#ifdef CONFIG_PM
+static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg);
+static int nv_swncq_port_resume(struct ata_port *ap);
+#endif
+
 enum nv_host_type
 {
 	GENERIC,
 	NFORCE2,
 	NFORCE3 = NFORCE2,	/* NF2 == NF3 as far as sata_nv is concerned */
 	CK804,
-	ADMA
+	ADMA,
+	SWNCQ,
 };
 
 static const struct pci_device_id nv_pci_tbl[] = {
@@ -280,13 +361,13 @@
 	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2), CK804 },
 	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA), CK804 },
 	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2), CK804 },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), GENERIC },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), GENERIC },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), GENERIC },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), GENERIC },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), GENERIC },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), GENERIC },
-	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), GENERIC },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), SWNCQ },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), SWNCQ },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), SWNCQ },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), SWNCQ },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), SWNCQ },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), SWNCQ },
+	{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), SWNCQ },
 
 	{ } /* terminate list */
 };
@@ -339,6 +420,25 @@
 	.bios_param		= ata_std_bios_param,
 };
 
+static struct scsi_host_template nv_swncq_sht = {
+	.module			= THIS_MODULE,
+	.name			= DRV_NAME,
+	.ioctl			= ata_scsi_ioctl,
+	.queuecommand		= ata_scsi_queuecmd,
+	.change_queue_depth	= ata_scsi_change_queue_depth,
+	.can_queue		= ATA_MAX_QUEUE,
+	.this_id		= ATA_SHT_THIS_ID,
+	.sg_tablesize		= LIBATA_MAX_PRD,
+	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,
+	.emulated		= ATA_SHT_EMULATED,
+	.use_clustering		= ATA_SHT_USE_CLUSTERING,
+	.proc_name		= DRV_NAME,
+	.dma_boundary		= ATA_DMA_BOUNDARY,
+	.slave_configure	= nv_swncq_slave_config,
+	.slave_destroy		= ata_scsi_slave_destroy,
+	.bios_param		= ata_std_bios_param,
+};
+
 static const struct ata_port_operations nv_generic_ops = {
 	.tf_load		= ata_tf_load,
 	.tf_read		= ata_tf_read,
@@ -444,6 +544,35 @@
 	.host_stop		= nv_adma_host_stop,
 };
 
+static const struct ata_port_operations nv_swncq_ops = {
+	.tf_load		= ata_tf_load,
+	.tf_read		= ata_tf_read,
+	.exec_command		= ata_exec_command,
+	.check_status		= ata_check_status,
+	.dev_select		= ata_std_dev_select,
+	.bmdma_setup		= ata_bmdma_setup,
+	.bmdma_start		= ata_bmdma_start,
+	.bmdma_stop		= ata_bmdma_stop,
+	.bmdma_status		= ata_bmdma_status,
+	.qc_defer		= ata_std_qc_defer,
+	.qc_prep		= nv_swncq_qc_prep,
+	.qc_issue		= nv_swncq_qc_issue,
+	.freeze			= nv_mcp55_freeze,
+	.thaw			= nv_mcp55_thaw,
+	.error_handler		= nv_swncq_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
+	.data_xfer		= ata_data_xfer,
+	.irq_clear		= ata_bmdma_irq_clear,
+	.irq_on			= ata_irq_on,
+	.scr_read		= nv_scr_read,
+	.scr_write		= nv_scr_write,
+#ifdef CONFIG_PM
+	.port_suspend		= nv_swncq_port_suspend,
+	.port_resume		= nv_swncq_port_resume,
+#endif
+	.port_start		= nv_swncq_port_start,
+};
+
 static const struct ata_port_info nv_port_info[] = {
 	/* generic */
 	{
@@ -490,6 +619,18 @@
 		.port_ops	= &nv_adma_ops,
 		.irq_handler	= nv_adma_interrupt,
 	},
+	/* SWNCQ */
+	{
+		.sht		= &nv_swncq_sht,
+		.flags	        = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+				  ATA_FLAG_NCQ,
+		.link_flags	= ATA_LFLAG_HRST_TO_RESUME,
+		.pio_mask	= NV_PIO_MASK,
+		.mwdma_mask	= NV_MWDMA_MASK,
+		.udma_mask	= NV_UDMA_MASK,
+		.port_ops	= &nv_swncq_ops,
+		.irq_handler	= nv_swncq_interrupt,
+	},
 };
 
 MODULE_AUTHOR("NVIDIA");
@@ -499,6 +640,7 @@
 MODULE_VERSION(DRV_VERSION);
 
 static int adma_enabled = 1;
+static int swncq_enabled;
 
 static void nv_adma_register_mode(struct ata_port *ap)
 {
@@ -1452,6 +1594,34 @@
 	writeb(mask, mmio_base + NV_INT_ENABLE_CK804);
 }
 
+static void nv_mcp55_freeze(struct ata_port *ap)
+{
+	void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR];
+	int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55;
+	u32 mask;
+
+	writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55);
+
+	mask = readl(mmio_base + NV_INT_ENABLE_MCP55);
+	mask &= ~(NV_INT_ALL_MCP55 << shift);
+	writel(mask, mmio_base + NV_INT_ENABLE_MCP55);
+	ata_bmdma_freeze(ap);
+}
+
+static void nv_mcp55_thaw(struct ata_port *ap)
+{
+	void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR];
+	int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55;
+	u32 mask;
+
+	writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55);
+
+	mask = readl(mmio_base + NV_INT_ENABLE_MCP55);
+	mask |= (NV_INT_MASK_MCP55 << shift);
+	writel(mask, mmio_base + NV_INT_ENABLE_MCP55);
+	ata_bmdma_thaw(ap);
+}
+
 static int nv_hardreset(struct ata_link *link, unsigned int *class,
 			unsigned long deadline)
 {
@@ -1525,6 +1695,663 @@
 			   nv_hardreset, ata_std_postreset);
 }
 
+static void nv_swncq_qc_to_dq(struct ata_port *ap, struct ata_queued_cmd *qc)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	struct defer_queue *dq = &pp->defer_queue;
+
+	/* queue is full */
+	WARN_ON(dq->tail - dq->head == ATA_MAX_QUEUE);
+	dq->defer_bits |= (1 << qc->tag);
+	dq->tag[dq->tail++ & (ATA_MAX_QUEUE - 1)] = qc->tag;
+}
+
+static struct ata_queued_cmd *nv_swncq_qc_from_dq(struct ata_port *ap)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	struct defer_queue *dq = &pp->defer_queue;
+	unsigned int tag;
+
+	if (dq->head == dq->tail)	/* null queue */
+		return NULL;
+
+	tag = dq->tag[dq->head & (ATA_MAX_QUEUE - 1)];
+	dq->tag[dq->head++ & (ATA_MAX_QUEUE - 1)] = ATA_TAG_POISON;
+	WARN_ON(!(dq->defer_bits & (1 << tag)));
+	dq->defer_bits &= ~(1 << tag);
+
+	return ata_qc_from_tag(ap, tag);
+}
+
+static void nv_swncq_fis_reinit(struct ata_port *ap)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+
+	pp->dhfis_bits = 0;
+	pp->dmafis_bits = 0;
+	pp->sdbfis_bits = 0;
+	pp->ncq_flags = 0;
+}
+
+static void nv_swncq_pp_reinit(struct ata_port *ap)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	struct defer_queue *dq = &pp->defer_queue;
+
+	dq->head = 0;
+	dq->tail = 0;
+	dq->defer_bits = 0;
+	pp->qc_active = 0;
+	pp->last_issue_tag = ATA_TAG_POISON;
+	nv_swncq_fis_reinit(ap);
+}
+
+static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+
+	writew(fis, pp->irq_block);
+}
+
+static void __ata_bmdma_stop(struct ata_port *ap)
+{
+	struct ata_queued_cmd qc;
+
+	qc.ap = ap;
+	ata_bmdma_stop(&qc);
+}
+
+static void nv_swncq_ncq_stop(struct ata_port *ap)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	unsigned int i;
+	u32 sactive;
+	u32 done_mask;
+
+	ata_port_printk(ap, KERN_ERR,
+			"EH in SWNCQ mode,QC:qc_active 0x%X sactive 0x%X\n",
+			ap->qc_active, ap->link.sactive);
+	ata_port_printk(ap, KERN_ERR,
+		"SWNCQ:qc_active 0x%X defer_bits 0x%X last_issue_tag 0x%x\n  "
+		"dhfis 0x%X dmafis 0x%X sdbfis 0x%X\n",
+		pp->qc_active, pp->defer_queue.defer_bits, pp->last_issue_tag,
+		pp->dhfis_bits, pp->dmafis_bits, pp->sdbfis_bits);
+
+	ata_port_printk(ap, KERN_ERR, "ATA_REG 0x%X ERR_REG 0x%X\n",
+			ap->ops->check_status(ap),
+			ioread8(ap->ioaddr.error_addr));
+
+	sactive = readl(pp->sactive_block);
+	done_mask = pp->qc_active ^ sactive;
+
+	ata_port_printk(ap, KERN_ERR, "tag : dhfis dmafis sdbfis sacitve\n");
+	for (i = 0; i < ATA_MAX_QUEUE; i++) {
+		u8 err = 0;
+		if (pp->qc_active & (1 << i))
+			err = 0;
+		else if (done_mask & (1 << i))
+			err = 1;
+		else
+			continue;
+
+		ata_port_printk(ap, KERN_ERR,
+				"tag 0x%x: %01x %01x %01x %01x %s\n", i,
+				(pp->dhfis_bits >> i) & 0x1,
+				(pp->dmafis_bits >> i) & 0x1,
+				(pp->sdbfis_bits >> i) & 0x1,
+				(sactive >> i) & 0x1,
+				(err ? "error! tag doesn't exit" : " "));
+	}
+
+	nv_swncq_pp_reinit(ap);
+	ap->ops->irq_clear(ap);
+	__ata_bmdma_stop(ap);
+	nv_swncq_irq_clear(ap, 0xffff);
+}
+
+static void nv_swncq_error_handler(struct ata_port *ap)
+{
+	struct ata_eh_context *ehc = &ap->link.eh_context;
+
+	if (ap->link.sactive) {
+		nv_swncq_ncq_stop(ap);
+		ehc->i.action |= ATA_EH_HARDRESET;
+	}
+
+	ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset,
+			   nv_hardreset, ata_std_postreset);
+}
+
+#ifdef CONFIG_PM
+static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg)
+{
+	void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
+	u32 tmp;
+
+	/* clear irq */
+	writel(~0, mmio + NV_INT_STATUS_MCP55);
+
+	/* disable irq */
+	writel(0, mmio + NV_INT_ENABLE_MCP55);
+
+	/* disable swncq */
+	tmp = readl(mmio + NV_CTL_MCP55);
+	tmp &= ~(NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ);
+	writel(tmp, mmio + NV_CTL_MCP55);
+
+	return 0;
+}
+
+static int nv_swncq_port_resume(struct ata_port *ap)
+{
+	void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
+	u32 tmp;
+
+	/* clear irq */
+	writel(~0, mmio + NV_INT_STATUS_MCP55);
+
+	/* enable irq */
+	writel(0x00fd00fd, mmio + NV_INT_ENABLE_MCP55);
+
+	/* enable swncq */
+	tmp = readl(mmio + NV_CTL_MCP55);
+	writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55);
+
+	return 0;
+}
+#endif
+
+static void nv_swncq_host_init(struct ata_host *host)
+{
+	u32 tmp;
+	void __iomem *mmio = host->iomap[NV_MMIO_BAR];
+	struct pci_dev *pdev = to_pci_dev(host->dev);
+	u8 regval;
+
+	/* disable  ECO 398 */
+	pci_read_config_byte(pdev, 0x7f, &regval);
+	regval &= ~(1 << 7);
+	pci_write_config_byte(pdev, 0x7f, regval);
+
+	/* enable swncq */
+	tmp = readl(mmio + NV_CTL_MCP55);
+	VPRINTK("HOST_CTL:0x%X\n", tmp);
+	writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55);
+
+	/* enable irq intr */
+	tmp = readl(mmio + NV_INT_ENABLE_MCP55);
+	VPRINTK("HOST_ENABLE:0x%X\n", tmp);
+	writel(tmp | 0x00fd00fd, mmio + NV_INT_ENABLE_MCP55);
+
+	/*  clear port irq */
+	writel(~0x0, mmio + NV_INT_STATUS_MCP55);
+}
+
+static int nv_swncq_slave_config(struct scsi_device *sdev)
+{
+	struct ata_port *ap = ata_shost_to_port(sdev->host);
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	struct ata_device *dev;
+	int rc;
+	u8 rev;
+	u8 check_maxtor = 0;
+	unsigned char model_num[ATA_ID_PROD_LEN + 1];
+
+	rc = ata_scsi_slave_config(sdev);
+	if (sdev->id >= ATA_MAX_DEVICES || sdev->channel || sdev->lun)
+		/* Not a proper libata device, ignore */
+		return rc;
+
+	dev = &ap->link.device[sdev->id];
+	if (!(ap->flags & ATA_FLAG_NCQ) || dev->class == ATA_DEV_ATAPI)
+		return rc;
+
+	/* if MCP51 and Maxtor, then disable ncq */
+	if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA ||
+		pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2)
+		check_maxtor = 1;
+
+	/* if MCP55 and rev <= a2 and Maxtor, then disable ncq */
+	if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA ||
+		pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2) {
+		pci_read_config_byte(pdev, 0x8, &rev);
+		if (rev <= 0xa2)
+			check_maxtor = 1;
+	}
+
+	if (!check_maxtor)
+		return rc;
+
+	ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
+
+	if (strncmp(model_num, "Maxtor", 6) == 0) {
+		ata_scsi_change_queue_depth(sdev, 1);
+		ata_dev_printk(dev, KERN_NOTICE,
+			"Disabling SWNCQ mode (depth %x)\n", sdev->queue_depth);
+	}
+
+	return rc;
+}
+
+static int nv_swncq_port_start(struct ata_port *ap)
+{
+	struct device *dev = ap->host->dev;
+	void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
+	struct nv_swncq_port_priv *pp;
+	int rc;
+
+	rc = ata_port_start(ap);
+	if (rc)
+		return rc;
+
+	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
+	if (!pp)
+		return -ENOMEM;
+
+	pp->prd = dmam_alloc_coherent(dev, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE,
+				      &pp->prd_dma, GFP_KERNEL);
+	if (!pp->prd)
+		return -ENOMEM;
+	memset(pp->prd, 0, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE);
+
+	ap->private_data = pp;
+	pp->sactive_block = ap->ioaddr.scr_addr + 4 * SCR_ACTIVE;
+	pp->irq_block = mmio + NV_INT_STATUS_MCP55 + ap->port_no * 2;
+	pp->tag_block = mmio + NV_NCQ_REG_MCP55 + ap->port_no * 2;
+
+	return 0;
+}
+
+static void nv_swncq_qc_prep(struct ata_queued_cmd *qc)
+{
+	if (qc->tf.protocol != ATA_PROT_NCQ) {
+		ata_qc_prep(qc);
+		return;
+	}
+
+	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+		return;
+
+	nv_swncq_fill_sg(qc);
+}
+
+static void nv_swncq_fill_sg(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct scatterlist *sg;
+	unsigned int idx;
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	struct ata_prd *prd;
+
+	WARN_ON(qc->__sg == NULL);
+	WARN_ON(qc->n_elem == 0 && qc->pad_len == 0);
+
+	prd = pp->prd + ATA_MAX_PRD * qc->tag;
+
+	idx = 0;
+	ata_for_each_sg(sg, qc) {
+		u32 addr, offset;
+		u32 sg_len, len;
+
+		addr = (u32)sg_dma_address(sg);
+		sg_len = sg_dma_len(sg);
+
+		while (sg_len) {
+			offset = addr & 0xffff;
+			len = sg_len;
+			if ((offset + sg_len) > 0x10000)
+				len = 0x10000 - offset;
+
+			prd[idx].addr = cpu_to_le32(addr);
+			prd[idx].flags_len = cpu_to_le32(len & 0xffff);
+
+			idx++;
+			sg_len -= len;
+			addr += len;
+		}
+	}
+
+	if (idx)
+		prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+}
+
+static unsigned int nv_swncq_issue_atacmd(struct ata_port *ap,
+					  struct ata_queued_cmd *qc)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+
+	if (qc == NULL)
+		return 0;
+
+	DPRINTK("Enter\n");
+
+	writel((1 << qc->tag), pp->sactive_block);
+	pp->last_issue_tag = qc->tag;
+	pp->dhfis_bits &= ~(1 << qc->tag);
+	pp->dmafis_bits &= ~(1 << qc->tag);
+	pp->qc_active |= (0x1 << qc->tag);
+
+	ap->ops->tf_load(ap, &qc->tf);	 /* load tf registers */
+	ap->ops->exec_command(ap, &qc->tf);
+
+	DPRINTK("Issued tag %u\n", qc->tag);
+
+	return 0;
+}
+
+static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct nv_swncq_port_priv *pp = ap->private_data;
+
+	if (qc->tf.protocol != ATA_PROT_NCQ)
+		return ata_qc_issue_prot(qc);
+
+	DPRINTK("Enter\n");
+
+	if (!pp->qc_active)
+		nv_swncq_issue_atacmd(ap, qc);
+	else
+		nv_swncq_qc_to_dq(ap, qc);	/* add qc to defer queue */
+
+	return 0;
+}
+
+static void nv_swncq_hotplug(struct ata_port *ap, u32 fis)
+{
+	u32 serror;
+	struct ata_eh_info *ehi = &ap->link.eh_info;
+
+	ata_ehi_clear_desc(ehi);
+
+	/* AHCI needs SError cleared; otherwise, it might lock up */
+	sata_scr_read(&ap->link, SCR_ERROR, &serror);
+	sata_scr_write(&ap->link, SCR_ERROR, serror);
+
+	/* analyze @irq_stat */
+	if (fis & NV_SWNCQ_IRQ_ADDED)
+		ata_ehi_push_desc(ehi, "hot plug");
+	else if (fis & NV_SWNCQ_IRQ_REMOVED)
+		ata_ehi_push_desc(ehi, "hot unplug");
+
+	ata_ehi_hotplugged(ehi);
+
+	/* okay, let's hand over to EH */
+	ehi->serror |= serror;
+
+	ata_port_freeze(ap);
+}
+
+static int nv_swncq_sdbfis(struct ata_port *ap)
+{
+	struct ata_queued_cmd *qc;
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	struct ata_eh_info *ehi = &ap->link.eh_info;
+	u32 sactive;
+	int nr_done = 0;
+	u32 done_mask;
+	int i;
+	u8 host_stat;
+	u8 lack_dhfis = 0;
+
+	host_stat = ap->ops->bmdma_status(ap);
+	if (unlikely(host_stat & ATA_DMA_ERR)) {
+		/* error when transfering data to/from memory */
+		ata_ehi_clear_desc(ehi);
+		ata_ehi_push_desc(ehi, "BMDMA stat 0x%x", host_stat);
+		ehi->err_mask |= AC_ERR_HOST_BUS;
+		ehi->action |= ATA_EH_SOFTRESET;
+		return -EINVAL;
+	}
+
+	ap->ops->irq_clear(ap);
+	__ata_bmdma_stop(ap);
+
+	sactive = readl(pp->sactive_block);
+	done_mask = pp->qc_active ^ sactive;
+
+	if (unlikely(done_mask & sactive)) {
+		ata_ehi_clear_desc(ehi);
+		ata_ehi_push_desc(ehi, "illegal SWNCQ:qc_active transition"
+				  "(%08x->%08x)", pp->qc_active, sactive);
+		ehi->err_mask |= AC_ERR_HSM;
+		ehi->action |= ATA_EH_HARDRESET;
+		return -EINVAL;
+	}
+	for (i = 0; i < ATA_MAX_QUEUE; i++) {
+		if (!(done_mask & (1 << i)))
+			continue;
+
+		qc = ata_qc_from_tag(ap, i);
+		if (qc) {
+			ata_qc_complete(qc);
+			pp->qc_active &= ~(1 << i);
+			pp->dhfis_bits &= ~(1 << i);
+			pp->dmafis_bits &= ~(1 << i);
+			pp->sdbfis_bits |= (1 << i);
+			nr_done++;
+		}
+	}
+
+	if (!ap->qc_active) {
+		DPRINTK("over\n");
+		nv_swncq_pp_reinit(ap);
+		return nr_done;
+	}
+
+	if (pp->qc_active & pp->dhfis_bits)
+		return nr_done;
+
+	if ((pp->ncq_flags & ncq_saw_backout) ||
+	    (pp->qc_active ^ pp->dhfis_bits))
+		/* if the controller cann't get a device to host register FIS,
+		 * The driver needs to reissue the new command.
+		 */
+		lack_dhfis = 1;
+
+	DPRINTK("id 0x%x QC: qc_active 0x%x,"
+		"SWNCQ:qc_active 0x%X defer_bits %X "
+		"dhfis 0x%X dmafis 0x%X last_issue_tag %x\n",
+		ap->print_id, ap->qc_active, pp->qc_active,
+		pp->defer_queue.defer_bits, pp->dhfis_bits,
+		pp->dmafis_bits, pp->last_issue_tag);
+
+	nv_swncq_fis_reinit(ap);
+
+	if (lack_dhfis) {
+		qc = ata_qc_from_tag(ap, pp->last_issue_tag);
+		nv_swncq_issue_atacmd(ap, qc);
+		return nr_done;
+	}
+
+	if (pp->defer_queue.defer_bits) {
+		/* send deferral queue command */
+		qc = nv_swncq_qc_from_dq(ap);
+		WARN_ON(qc == NULL);
+		nv_swncq_issue_atacmd(ap, qc);
+	}
+
+	return nr_done;
+}
+
+static inline u32 nv_swncq_tag(struct ata_port *ap)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	u32 tag;
+
+	tag = readb(pp->tag_block) >> 2;
+	return (tag & 0x1f);
+}
+
+static int nv_swncq_dmafis(struct ata_port *ap)
+{
+	struct ata_queued_cmd *qc;
+	unsigned int rw;
+	u8 dmactl;
+	u32 tag;
+	struct nv_swncq_port_priv *pp = ap->private_data;
+
+	__ata_bmdma_stop(ap);
+	tag = nv_swncq_tag(ap);
+
+	DPRINTK("dma setup tag 0x%x\n", tag);
+	qc = ata_qc_from_tag(ap, tag);
+
+	if (unlikely(!qc))
+		return 0;
+
+	rw = qc->tf.flags & ATA_TFLAG_WRITE;
+
+	/* load PRD table addr. */
+	iowrite32(pp->prd_dma + ATA_PRD_TBL_SZ * qc->tag,
+		  ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
+
+	/* specify data direction, triple-check start bit is clear */
+	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+	dmactl &= ~ATA_DMA_WR;
+	if (!rw)
+		dmactl |= ATA_DMA_WR;
+
+	iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+
+	return 1;
+}
+
+static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis)
+{
+	struct nv_swncq_port_priv *pp = ap->private_data;
+	struct ata_queued_cmd *qc;
+	struct ata_eh_info *ehi = &ap->link.eh_info;
+	u32 serror;
+	u8 ata_stat;
+	int rc = 0;
+
+	ata_stat = ap->ops->check_status(ap);
+	nv_swncq_irq_clear(ap, fis);
+	if (!fis)
+		return;
+
+	if (ap->pflags & ATA_PFLAG_FROZEN)
+		return;
+
+	if (fis & NV_SWNCQ_IRQ_HOTPLUG) {
+		nv_swncq_hotplug(ap, fis);
+		return;
+	}
+
+	if (!pp->qc_active)
+		return;
+
+	if (ap->ops->scr_read(ap, SCR_ERROR, &serror))
+		return;
+	ap->ops->scr_write(ap, SCR_ERROR, serror);
+
+	if (ata_stat & ATA_ERR) {
+		ata_ehi_clear_desc(ehi);
+		ata_ehi_push_desc(ehi, "Ata error. fis:0x%X", fis);
+		ehi->err_mask |= AC_ERR_DEV;
+		ehi->serror |= serror;
+		ehi->action |= ATA_EH_SOFTRESET;
+		ata_port_freeze(ap);
+		return;
+	}
+
+	if (fis & NV_SWNCQ_IRQ_BACKOUT) {
+		/* If the IRQ is backout, driver must issue
+		 * the new command again some time later.
+		 */
+		pp->ncq_flags |= ncq_saw_backout;
+	}
+
+	if (fis & NV_SWNCQ_IRQ_SDBFIS) {
+		pp->ncq_flags |= ncq_saw_sdb;
+		DPRINTK("id 0x%x SWNCQ: qc_active 0x%X "
+			"dhfis 0x%X dmafis 0x%X sactive 0x%X\n",
+			ap->print_id, pp->qc_active, pp->dhfis_bits,
+			pp->dmafis_bits, readl(pp->sactive_block));
+		rc = nv_swncq_sdbfis(ap);
+		if (rc < 0)
+			goto irq_error;
+	}
+
+	if (fis & NV_SWNCQ_IRQ_DHREGFIS) {
+		/* The interrupt indicates the new command
+		 * was transmitted correctly to the drive.
+		 */
+		pp->dhfis_bits |= (0x1 << pp->last_issue_tag);
+		pp->ncq_flags |= ncq_saw_d2h;
+		if (pp->ncq_flags & (ncq_saw_sdb | ncq_saw_backout)) {
+			ata_ehi_push_desc(ehi, "illegal fis transaction");
+			ehi->err_mask |= AC_ERR_HSM;
+			ehi->action |= ATA_EH_HARDRESET;
+			goto irq_error;
+		}
+
+		if (!(fis & NV_SWNCQ_IRQ_DMASETUP) &&
+		    !(pp->ncq_flags & ncq_saw_dmas)) {
+			ata_stat = ap->ops->check_status(ap);
+			if (ata_stat & ATA_BUSY)
+				goto irq_exit;
+
+			if (pp->defer_queue.defer_bits) {
+				DPRINTK("send next command\n");
+				qc = nv_swncq_qc_from_dq(ap);
+				nv_swncq_issue_atacmd(ap, qc);
+			}
+		}
+	}
+
+	if (fis & NV_SWNCQ_IRQ_DMASETUP) {
+		/* program the dma controller with appropriate PRD buffers
+		 * and start the DMA transfer for requested command.
+		 */
+		pp->dmafis_bits |= (0x1 << nv_swncq_tag(ap));
+		pp->ncq_flags |= ncq_saw_dmas;
+		rc = nv_swncq_dmafis(ap);
+	}
+
+irq_exit:
+	return;
+irq_error:
+	ata_ehi_push_desc(ehi, "fis:0x%x", fis);
+	ata_port_freeze(ap);
+	return;
+}
+
+static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance)
+{
+	struct ata_host *host = dev_instance;
+	unsigned int i;
+	unsigned int handled = 0;
+	unsigned long flags;
+	u32 irq_stat;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	irq_stat = readl(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_MCP55);
+
+	for (i = 0; i < host->n_ports; i++) {
+		struct ata_port *ap = host->ports[i];
+
+		if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
+			if (ap->link.sactive) {
+				nv_swncq_host_interrupt(ap, (u16)irq_stat);
+				handled = 1;
+			} else {
+				if (irq_stat)	/* reserve Hotplug */
+					nv_swncq_irq_clear(ap, 0xfff0);
+
+				handled += nv_host_intr(ap, (u8)irq_stat);
+			}
+		}
+		irq_stat >>= NV_INT_PORT_SHIFT_MCP55;
+	}
+
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
 static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version = 0;
@@ -1551,7 +2378,7 @@
 		return rc;
 
 	/* determine type and allocate host */
-	if (type >= CK804 && adma_enabled) {
+	if (type == CK804 && adma_enabled) {
 		dev_printk(KERN_NOTICE, &pdev->dev, "Using ADMA mode\n");
 		type = ADMA;
 	}
@@ -1597,6 +2424,9 @@
 		rc = nv_adma_host_init(host);
 		if (rc)
 			return rc;
+	} else if (type == SWNCQ && swncq_enabled) {
+		dev_printk(KERN_NOTICE, &pdev->dev, "Using SWNCQ mode\n");
+		nv_swncq_host_init(host);
 	}
 
 	pci_set_master(pdev);
@@ -1696,3 +2526,6 @@
 module_exit(nv_exit);
 module_param_named(adma, adma_enabled, bool, 0444);
 MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: true)");
+module_param_named(swncq, swncq_enabled, bool, 0444);
+MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: false)");
+
diff --git a/drivers/char/ec3104_keyb.c b/drivers/char/ec3104_keyb.c
deleted file mode 100644
index 0200114..0000000
--- a/drivers/char/ec3104_keyb.c
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * linux/drivers/char/ec3104_keyb.c
- * 
- * Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>
- *
- * based on linux/drivers/char/pc_keyb.c, which had the following comments:
- *
- * Separation of the PC low-level part by Geert Uytterhoeven, May 1997
- * See keyboard.c for the whole history.
- *
- * Major cleanup by Martin Mares, May 1997
- *
- * Combined the keyboard and PS/2 mouse handling into one file,
- * because they share the same hardware.
- * Johan Myreen <jem@iki.fi> 1998-10-08.
- *
- * Code fixes to handle mouse ACKs properly.
- * C. Scott Ananian <cananian@alumni.princeton.edu> 1999-01-29.
- */
-/* EC3104 note:
- * This code was written without any documentation about the EC3104 chip.  While
- * I hope I got most of the basic functionality right, the register names I use
- * are most likely completely different from those in the chip documentation.
- *
- * If you have any further information about the EC3104, please tell me
- * (prumpf@tux.org).
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/tty.h>
-#include <linux/mm.h>
-#include <linux/signal.h>
-#include <linux/init.h>
-#include <linux/kbd_ll.h>
-#include <linux/delay.h>
-#include <linux/random.h>
-#include <linux/poll.h>
-#include <linux/miscdevice.h>
-#include <linux/slab.h>
-#include <linux/kbd_kern.h>
-#include <linux/bitops.h>
-
-#include <asm/keyboard.h>
-#include <asm/uaccess.h>
-#include <asm/irq.h>
-#include <asm/system.h>
-#include <asm/ec3104.h>
-
-#include <asm/io.h>
-
-/* Some configuration switches are present in the include file... */
-
-#include <linux/pc_keyb.h>
-
-#define MSR_CTS 0x10
-#define MCR_RTS 0x02
-#define LSR_DR 0x01
-#define LSR_BOTH_EMPTY 0x60
-
-static struct e5_struct {
-	u8 packet[8];
-	int pos;
-	int length;
-
-	u8 cached_mcr;
-	u8 last_msr;
-} ec3104_keyb;
-	
-/* Simple translation table for the SysRq keys */
-
-
-#ifdef CONFIG_MAGIC_SYSRQ
-unsigned char ec3104_kbd_sysrq_xlate[128] =
-	"\000\0331234567890-=\177\t"			/* 0x00 - 0x0f */
-	"qwertyuiop[]\r\000as"				/* 0x10 - 0x1f */
-	"dfghjkl;'`\000\\zxcv"				/* 0x20 - 0x2f */
-	"bnm,./\000*\000 \000\201\202\203\204\205"	/* 0x30 - 0x3f */
-	"\206\207\210\211\212\000\000789-456+1"		/* 0x40 - 0x4f */
-	"230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
-	"\r\000/";					/* 0x60 - 0x6f */
-#endif
-
-static void kbd_write_command_w(int data);
-static void kbd_write_output_w(int data);
-#ifdef CONFIG_PSMOUSE
-static void aux_write_ack(int val);
-static void __aux_write_ack(int val);
-#endif
-
-static DEFINE_SPINLOCK(kbd_controller_lock);
-static unsigned char handle_kbd_event(void);
-
-/* used only by send_data - set by keyboard_interrupt */
-static volatile unsigned char reply_expected;
-static volatile unsigned char acknowledge;
-static volatile unsigned char resend;
-
-
-int ec3104_kbd_setkeycode(unsigned int scancode, unsigned int keycode)
-{
-	return 0;
-}
-
-int ec3104_kbd_getkeycode(unsigned int scancode)
-{
-	return 0;
-}
-
-
-/* yes, it probably would be faster to use an array.  I don't care. */
-
-static inline unsigned char ec3104_scan2key(unsigned char scancode)
-{
-	switch (scancode) {
-	case  1: /* '`' */
-		return 41;
-		
-	case  2 ... 27:
-		return scancode;
-		
-	case 28: /* '\\' */
-		return 43;
-
-	case 29 ... 39:
-		return scancode + 1;
-
-	case 40: /* '\r' */
-		return 28;
-
-	case 41 ... 50:
-		return scancode + 3;
-
-	case 51: /* ' ' */
-		return 57;
-		
-	case 52: /* escape */
-		return 1;
-
-	case 54: /* insert/delete (labelled delete) */
-		/* this should arguably be 110, but I'd like to have ctrl-alt-del
-		 * working with a standard keymap */
-		return 111;
-
-	case 55: /* left */
-		return 105;
-	case 56: /* home */
-		return 102;
-	case 57: /* end */
-		return 107;
-	case 58: /* up */
-		return 103;
-	case 59: /* down */
-		return 108;
-	case 60: /* pgup */
-		return 104;
-	case 61: /* pgdown */
-		return 109;
-	case 62: /* right */
-		return 106;
-
-	case 79 ... 88: /* f1 - f10 */
-		return scancode - 20;
-
-	case 89 ... 90: /* f11 - f12 */
-		return scancode - 2;
-
-	case 91: /* left shift */
-		return 42;
-
-	case 92: /* right shift */
-		return 54;
-
-	case 93: /* left alt */
-		return 56;
-	case 94: /* right alt */
-		return 100;
-	case 95: /* left ctrl */
-		return 29;
-	case 96: /* right ctrl */
-		return 97;
-
-	case 97: /* caps lock */
-		return 58;
-	case 102: /* left windows */
-		return 125;
-	case 103: /* right windows */
-		return 126;
-
-	case 106: /* Fn */
-		/* this is wrong. */
-		return 84;
-
-	default:
-		return 0;
-	}
-}
-		
-int ec3104_kbd_translate(unsigned char scancode, unsigned char *keycode,
-		    char raw_mode)
-{
-	scancode &= 0x7f;
-
-	*keycode = ec3104_scan2key(scancode);
-
- 	return 1;
-}
-
-char ec3104_kbd_unexpected_up(unsigned char keycode)
-{
-	return 0200;
-}
-
-static inline void handle_keyboard_event(unsigned char scancode)
-{
-#ifdef CONFIG_VT
-	handle_scancode(scancode, !(scancode & 0x80));
-#endif				
-	tasklet_schedule(&keyboard_tasklet);
-}	
-
-void ec3104_kbd_leds(unsigned char leds)
-{
-}
-
-static u8 e5_checksum(u8 *packet, int count)
-{
-	int i;
-	u8 sum = 0;
-
-	for (i=0; i<count; i++)
-		sum ^= packet[i];
-		
-	if (sum & 0x80)
-		sum ^= 0xc0;
-
-	return sum;
-}
-
-static void e5_wait_for_cts(struct e5_struct *k)
-{
-	u8 msr;
-		
-	do {
-		msr = ctrl_inb(EC3104_SER4_MSR);
-	} while (!(msr & MSR_CTS));
-}
-
-
-static void e5_send_byte(u8 byte, struct e5_struct *k)
-{
-	u8 status;
-		
-	do {
-		status = ctrl_inb(EC3104_SER4_LSR);
-	} while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY);
-	
-	printk("<%02x>", byte);
-
-	ctrl_outb(byte, EC3104_SER4_DATA);
-
-	do {
-		status = ctrl_inb(EC3104_SER4_LSR);
-	} while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY);
-	
-}
-
-static int e5_send_packet(u8 *packet, int count, struct e5_struct *k)
-{
-	int i;
-
-	disable_irq(EC3104_IRQ_SER4);
-	
-	if (k->cached_mcr & MCR_RTS) {
-		printk("e5_send_packet: too slow\n");
-		enable_irq(EC3104_IRQ_SER4);
-		return -EAGAIN;
-	}
-
-	k->cached_mcr |= MCR_RTS;
-	ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-
-	e5_wait_for_cts(k);
-
-	printk("p: ");
-
-	for(i=0; i<count; i++)
-		e5_send_byte(packet[i], k);
-
-	e5_send_byte(e5_checksum(packet, count), k);
-
-	printk("\n");
-
-	udelay(1500);
-
-	k->cached_mcr &= ~MCR_RTS;
-	ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	
-	
-
-	enable_irq(EC3104_IRQ_SER4);
-
-	
-
-	return 0;
-}
-
-/*
- * E5 packets we know about:
- * E5->host 0x80 0x05 <checksum> - resend packet
- * host->E5 0x83 0x43 <contrast> - set LCD contrast
- * host->E5 0x85 0x41 0x02 <brightness> 0x02 - set LCD backlight
- * E5->host 0x87 <ps2 packet> 0x00 <checksum> - external PS2 
- * E5->host 0x88 <scancode> <checksum> - key press
- */
-
-static void e5_receive(struct e5_struct *k)
-{
-	k->packet[k->pos++] = ctrl_inb(EC3104_SER4_DATA);
-
-	if (k->pos == 1) {
-		switch(k->packet[0]) {
-		case 0x80:
-			k->length = 3;
-			break;
-			
-		case 0x87: /* PS2 ext */
-			k->length = 6;
-			break;
-
-		case 0x88: /* keyboard */
-			k->length = 3;
-			break;
-
-		default:
-			k->length = 1;
-			printk(KERN_WARNING "unknown E5 packet %02x\n",
-			       k->packet[0]);
-		}
-	}
-
-	if (k->pos == k->length) {
-		int i;
-
-		if (e5_checksum(k->packet, k->length) != 0)
-			printk(KERN_WARNING "E5: wrong checksum\n");
-
-#if 0
-		printk("E5 packet [");
-		for(i=0; i<k->length; i++) {
-			printk("%02x ", k->packet[i]);
-		}
-
-		printk("(%02x)]\n", e5_checksum(k->packet, k->length-1));
-#endif
-
-		switch(k->packet[0]) {
-		case 0x80:
-		case 0x88:
-			handle_keyboard_event(k->packet[1]);
-			break;
-		}
-
-		k->pos = k->length = 0;
-	}
-}
-
-static void ec3104_keyb_interrupt(int irq, void *data)
-{
-	struct e5_struct *k = &ec3104_keyb;
-	u8 msr, lsr;
-
-	msr = ctrl_inb(EC3104_SER4_MSR);
-	
-	if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) {
-		if (k->cached_mcr & MCR_RTS)
-			printk("confused: RTS already high\n");
-		/* CTS went high.  Send RTS. */
-		k->cached_mcr |= MCR_RTS;
-		
-		ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-	} else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) {
-		/* CTS went low. */
-		if (!(k->cached_mcr & MCR_RTS))
-			printk("confused: RTS already low\n");
-
-		k->cached_mcr &= ~MCR_RTS;
-
-		ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-	}
-
-	k->last_msr = msr;
-
-	lsr = ctrl_inb(EC3104_SER4_LSR);
-
-	if (lsr & LSR_DR)
-		e5_receive(k);
-}
-
-static void ec3104_keyb_clear_state(void)
-{
-	struct e5_struct *k = &ec3104_keyb;
-	u8 msr, lsr;
-	
-	/* we want CTS to be low */
-	k->last_msr = 0;
-
-	for (;;) {
-		msleep(100);
-
-		msr = ctrl_inb(EC3104_SER4_MSR);
-	
-		lsr = ctrl_inb(EC3104_SER4_LSR);
-		
-		if (lsr & LSR_DR) {
-			e5_receive(k);
-			continue;
-		}
-
-		if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) {
-			if (k->cached_mcr & MCR_RTS)
-				printk("confused: RTS already high\n");
-			/* CTS went high.  Send RTS. */
-			k->cached_mcr |= MCR_RTS;
-		
-			ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-		} else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) {
-			/* CTS went low. */
-			if (!(k->cached_mcr & MCR_RTS))
-				printk("confused: RTS already low\n");
-			
-			k->cached_mcr &= ~MCR_RTS;
-			
-			ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-		} else
-			break;
-
-		k->last_msr = msr;
-
-		continue;
-	}
-}
-
-void __init ec3104_kbd_init_hw(void)
-{
-	ec3104_keyb.last_msr = ctrl_inb(EC3104_SER4_MSR);
-	ec3104_keyb.cached_mcr = ctrl_inb(EC3104_SER4_MCR);
-
-	ec3104_keyb_clear_state();
-
-	/* Ok, finally allocate the IRQ, and off we go.. */
-	request_irq(EC3104_IRQ_SER4, ec3104_keyb_interrupt, 0, "keyboard", NULL);
-}
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index e47f881..700a165 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -158,6 +158,7 @@
 config SENSORS_AMS
 	tristate "Apple Motion Sensor driver"
 	depends on PPC_PMAC && !PPC64 && INPUT && ((ADB_PMU && I2C = y) || (ADB_PMU && !I2C) || I2C) && EXPERIMENTAL
+	select INPUT_POLLDEV
 	help
 	  Support for the motion sensor included in PowerBooks. Includes
 	  implementations for PMU and I2C.
@@ -701,6 +702,7 @@
 config SENSORS_HDAPS
 	tristate "IBM Hard Drive Active Protection System (hdaps)"
 	depends on INPUT && X86
+	select INPUT_POLLDEV
 	default n
 	help
 	  This driver provides support for the IBM Hard Drive Active Protection
@@ -722,6 +724,7 @@
 	depends on INPUT && X86
 	select NEW_LEDS
 	select LEDS_CLASS
+	select INPUT_POLLDEV
 	default n
 	help
 	  This driver provides support for the Apple System Management
diff --git a/drivers/hwmon/ams/ams-input.c b/drivers/hwmon/ams/ams-input.c
index ca7095d..7b81e0c2 100644
--- a/drivers/hwmon/ams/ams-input.c
+++ b/drivers/hwmon/ams/ams-input.c
@@ -27,47 +27,32 @@
 module_param(invert, bool, 0644);
 MODULE_PARM_DESC(invert, "Invert input data on X and Y axis");
 
-static int ams_input_kthread(void *data)
+static void ams_idev_poll(struct input_polled_dev *dev)
 {
+	struct input_dev *idev = dev->input;
 	s8 x, y, z;
 
-	while (!kthread_should_stop()) {
-		mutex_lock(&ams_info.lock);
+	mutex_lock(&ams_info.lock);
 
-		ams_sensors(&x, &y, &z);
+	ams_sensors(&x, &y, &z);
 
-		x -= ams_info.xcalib;
-		y -= ams_info.ycalib;
-		z -= ams_info.zcalib;
+	x -= ams_info.xcalib;
+	y -= ams_info.ycalib;
+	z -= ams_info.zcalib;
 
-		input_report_abs(ams_info.idev, ABS_X, invert ? -x : x);
-		input_report_abs(ams_info.idev, ABS_Y, invert ? -y : y);
-		input_report_abs(ams_info.idev, ABS_Z, z);
+	input_report_abs(idev, ABS_X, invert ? -x : x);
+	input_report_abs(idev, ABS_Y, invert ? -y : y);
+	input_report_abs(idev, ABS_Z, z);
 
-		input_sync(ams_info.idev);
+	input_sync(idev);
 
-		mutex_unlock(&ams_info.lock);
-
-		msleep(25);
-	}
-
-	return 0;
-}
-
-static int ams_input_open(struct input_dev *dev)
-{
-	ams_info.kthread = kthread_run(ams_input_kthread, NULL, "kams");
-	return IS_ERR(ams_info.kthread) ? PTR_ERR(ams_info.kthread) : 0;
-}
-
-static void ams_input_close(struct input_dev *dev)
-{
-	kthread_stop(ams_info.kthread);
+	mutex_unlock(&ams_info.lock);
 }
 
 /* Call with ams_info.lock held! */
 static void ams_input_enable(void)
 {
+	struct input_dev *input;
 	s8 x, y, z;
 
 	if (ams_info.idev)
@@ -78,27 +63,29 @@
 	ams_info.ycalib = y;
 	ams_info.zcalib = z;
 
-	ams_info.idev = input_allocate_device();
+	ams_info.idev = input_allocate_polled_device();
 	if (!ams_info.idev)
 		return;
 
-	ams_info.idev->name = "Apple Motion Sensor";
-	ams_info.idev->id.bustype = ams_info.bustype;
-	ams_info.idev->id.vendor = 0;
-	ams_info.idev->open = ams_input_open;
-	ams_info.idev->close = ams_input_close;
-	ams_info.idev->dev.parent = &ams_info.of_dev->dev;
+	ams_info.idev->poll = ams_idev_poll;
+	ams_info.idev->poll_interval = 25;
 
-	input_set_abs_params(ams_info.idev, ABS_X, -50, 50, 3, 0);
-	input_set_abs_params(ams_info.idev, ABS_Y, -50, 50, 3, 0);
-	input_set_abs_params(ams_info.idev, ABS_Z, -50, 50, 3, 0);
+	input = ams_info.idev->input;
+	input->name = "Apple Motion Sensor";
+	input->id.bustype = ams_info.bustype;
+	input->id.vendor = 0;
+	input->dev.parent = &ams_info.of_dev->dev;
 
-	set_bit(EV_ABS, ams_info.idev->evbit);
-	set_bit(EV_KEY, ams_info.idev->evbit);
-	set_bit(BTN_TOUCH, ams_info.idev->keybit);
+	input_set_abs_params(input, ABS_X, -50, 50, 3, 0);
+	input_set_abs_params(input, ABS_Y, -50, 50, 3, 0);
+	input_set_abs_params(input, ABS_Z, -50, 50, 3, 0);
 
-	if (input_register_device(ams_info.idev)) {
-		input_free_device(ams_info.idev);
+	set_bit(EV_ABS, input->evbit);
+	set_bit(EV_KEY, input->evbit);
+	set_bit(BTN_TOUCH, input->keybit);
+
+	if (input_register_polled_device(ams_info.idev)) {
+		input_free_polled_device(ams_info.idev);
 		ams_info.idev = NULL;
 		return;
 	}
@@ -108,7 +95,8 @@
 static void ams_input_disable(void)
 {
 	if (ams_info.idev) {
-		input_unregister_device(ams_info.idev);
+		input_unregister_polled_device(ams_info.idev);
+		input_free_polled_device(ams_info.idev);
 		ams_info.idev = NULL;
 	}
 }
diff --git a/drivers/hwmon/ams/ams.h b/drivers/hwmon/ams/ams.h
index 240730e..a6221e5 100644
--- a/drivers/hwmon/ams/ams.h
+++ b/drivers/hwmon/ams/ams.h
@@ -1,5 +1,5 @@
 #include <linux/i2c.h>
-#include <linux/input.h>
+#include <linux/input-polldev.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
@@ -52,8 +52,7 @@
 #endif
 
 	/* Joystick emulation */
-	struct task_struct *kthread;
-	struct input_dev *idev;
+	struct input_polled_dev *idev;
 	__u16 bustype;
 
 	/* calibrated null values */
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index f37fd7e..4879125 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -28,7 +28,7 @@
 
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/input.h>
+#include <linux/input-polldev.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/timer.h>
@@ -59,9 +59,9 @@
 
 #define LIGHT_SENSOR_LEFT_KEY	"ALV0" /* r-o {alv (6 bytes) */
 #define LIGHT_SENSOR_RIGHT_KEY	"ALV1" /* r-o {alv (6 bytes) */
-#define BACKLIGHT_KEY 		"LKSB" /* w-o {lkb (2 bytes) */
+#define BACKLIGHT_KEY		"LKSB" /* w-o {lkb (2 bytes) */
 
-#define CLAMSHELL_KEY 		"MSLD" /* r-o ui8 (unused) */
+#define CLAMSHELL_KEY		"MSLD" /* r-o ui8 (unused) */
 
 #define MOTION_SENSOR_X_KEY	"MO_X" /* r-o sp78 (2 bytes) */
 #define MOTION_SENSOR_Y_KEY	"MO_Y" /* r-o sp78 (2 bytes) */
@@ -103,7 +103,7 @@
 #define INIT_TIMEOUT_MSECS	5000	/* wait up to 5s for device init ... */
 #define INIT_WAIT_MSECS		50	/* ... in 50ms increments */
 
-#define APPLESMC_POLL_PERIOD	(HZ/20)	/* poll for input every 1/20s */
+#define APPLESMC_POLL_INTERVAL	50	/* msecs */
 #define APPLESMC_INPUT_FUZZ	4	/* input event threshold */
 #define APPLESMC_INPUT_FLAT	4
 
@@ -125,9 +125,8 @@
 static struct platform_device *pdev;
 static s16 rest_x;
 static s16 rest_y;
-static struct timer_list applesmc_timer;
-static struct input_dev *applesmc_idev;
 static struct device *hwmon_dev;
+static struct input_polled_dev *applesmc_idev;
 
 /* Indicates whether this computer has an accelerometer. */
 static unsigned int applesmc_accelerometer;
@@ -138,7 +137,7 @@
 /* Indicates which temperature sensors set to use. */
 static unsigned int applesmc_temperature_set;
 
-static struct mutex applesmc_lock;
+static DEFINE_MUTEX(applesmc_lock);
 
 /*
  * Last index written to key_at_index sysfs file, and value to use for all other
@@ -455,27 +454,12 @@
 	rest_x = -rest_x;
 }
 
-static int applesmc_idev_open(struct input_dev *dev)
+static void applesmc_idev_poll(struct input_polled_dev *dev)
 {
-	add_timer(&applesmc_timer);
-
-	return 0;
-}
-
-static void applesmc_idev_close(struct input_dev *dev)
-{
-	del_timer_sync(&applesmc_timer);
-}
-
-static void applesmc_idev_poll(unsigned long unused)
-{
+	struct input_dev *idev = dev->input;
 	s16 x, y;
 
-	/* Cannot sleep.  Try nonblockingly.  If we fail, try again later. */
-	if (!mutex_trylock(&applesmc_lock)) {
-		mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD);
-		return;
-	}
+	mutex_lock(&applesmc_lock);
 
 	if (applesmc_read_motion_sensor(SENSOR_X, &x))
 		goto out;
@@ -483,13 +467,11 @@
 		goto out;
 
 	x = -x;
-	input_report_abs(applesmc_idev, ABS_X, x - rest_x);
-	input_report_abs(applesmc_idev, ABS_Y, y - rest_y);
-	input_sync(applesmc_idev);
+	input_report_abs(idev, ABS_X, x - rest_x);
+	input_report_abs(idev, ABS_Y, y - rest_y);
+	input_sync(idev);
 
 out:
-	mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD);
-
 	mutex_unlock(&applesmc_lock);
 }
 
@@ -821,8 +803,7 @@
 
 	if (!ret) {
 		return info[0];
-	}
-	else {
+	} else {
 		return ret;
 	}
 }
@@ -1093,6 +1074,7 @@
 /* Create accelerometer ressources */
 static int applesmc_create_accelerometer(void)
 {
+	struct input_dev *idev;
 	int ret;
 
 	ret = sysfs_create_group(&pdev->dev.kobj,
@@ -1100,40 +1082,37 @@
 	if (ret)
 		goto out;
 
-	applesmc_idev = input_allocate_device();
+	applesmc_idev = input_allocate_polled_device();
 	if (!applesmc_idev) {
 		ret = -ENOMEM;
 		goto out_sysfs;
 	}
 
+	applesmc_idev->poll = applesmc_idev_poll;
+	applesmc_idev->poll_interval = APPLESMC_POLL_INTERVAL;
+
 	/* initial calibrate for the input device */
 	applesmc_calibrate();
 
-	/* initialize the input class */
-	applesmc_idev->name = "applesmc";
-	applesmc_idev->id.bustype = BUS_HOST;
-	applesmc_idev->dev.parent = &pdev->dev;
-	applesmc_idev->evbit[0] = BIT(EV_ABS);
-	applesmc_idev->open = applesmc_idev_open;
-	applesmc_idev->close = applesmc_idev_close;
-	input_set_abs_params(applesmc_idev, ABS_X,
+	/* initialize the input device */
+	idev = applesmc_idev->input;
+	idev->name = "applesmc";
+	idev->id.bustype = BUS_HOST;
+	idev->dev.parent = &pdev->dev;
+	idev->evbit[0] = BIT(EV_ABS);
+	input_set_abs_params(idev, ABS_X,
 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
-	input_set_abs_params(applesmc_idev, ABS_Y,
+	input_set_abs_params(idev, ABS_Y,
 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
 
-	ret = input_register_device(applesmc_idev);
+	ret = input_register_polled_device(applesmc_idev);
 	if (ret)
 		goto out_idev;
 
-	/* start up our timer for the input device */
-	init_timer(&applesmc_timer);
-	applesmc_timer.function = applesmc_idev_poll;
-	applesmc_timer.expires = jiffies + APPLESMC_POLL_PERIOD;
-
 	return 0;
 
 out_idev:
-	input_free_device(applesmc_idev);
+	input_free_polled_device(applesmc_idev);
 
 out_sysfs:
 	sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group);
@@ -1146,8 +1125,8 @@
 /* Release all ressources used by the accelerometer */
 static void applesmc_release_accelerometer(void)
 {
-	del_timer_sync(&applesmc_timer);
-	input_unregister_device(applesmc_idev);
+	input_unregister_polled_device(applesmc_idev);
+	input_free_polled_device(applesmc_idev);
 	sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group);
 }
 
@@ -1184,8 +1163,6 @@
 	int count;
 	int i;
 
-	mutex_init(&applesmc_lock);
-
 	if (!dmi_check_system(applesmc_whitelist)) {
 		printk(KERN_WARNING "applesmc: supported laptop not found!\n");
 		ret = -ENODEV;
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index a7c6d40..8a7ae03 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -28,7 +28,7 @@
 
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/input.h>
+#include <linux/input-polldev.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -61,13 +61,12 @@
 #define INIT_TIMEOUT_MSECS	4000	/* wait up to 4s for device init ... */
 #define INIT_WAIT_MSECS		200	/* ... in 200ms increments */
 
-#define HDAPS_POLL_PERIOD	(HZ/20)	/* poll for input every 1/20s */
+#define HDAPS_POLL_INTERVAL	50	/* poll for input every 1/20s (50 ms)*/
 #define HDAPS_INPUT_FUZZ	4	/* input event threshold */
 #define HDAPS_INPUT_FLAT	4
 
-static struct timer_list hdaps_timer;
 static struct platform_device *pdev;
-static struct input_dev *hdaps_idev;
+static struct input_polled_dev *hdaps_idev;
 static unsigned int hdaps_invert;
 static u8 km_activity;
 static int rest_x;
@@ -323,24 +322,19 @@
 	__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y);
 }
 
-static void hdaps_mousedev_poll(unsigned long unused)
+static void hdaps_mousedev_poll(struct input_polled_dev *dev)
 {
+	struct input_dev *input_dev = dev->input;
 	int x, y;
 
-	/* Cannot sleep.  Try nonblockingly.  If we fail, try again later. */
-	if (mutex_trylock(&hdaps_mtx)) {
-		mod_timer(&hdaps_timer,jiffies + HDAPS_POLL_PERIOD);
-		return;
-	}
+	mutex_lock(&hdaps_mtx);
 
 	if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y))
 		goto out;
 
-	input_report_abs(hdaps_idev, ABS_X, x - rest_x);
-	input_report_abs(hdaps_idev, ABS_Y, y - rest_y);
-	input_sync(hdaps_idev);
-
-	mod_timer(&hdaps_timer, jiffies + HDAPS_POLL_PERIOD);
+	input_report_abs(input_dev, ABS_X, x - rest_x);
+	input_report_abs(input_dev, ABS_Y, y - rest_y);
+	input_sync(input_dev);
 
 out:
 	mutex_unlock(&hdaps_mtx);
@@ -536,6 +530,7 @@
 
 static int __init hdaps_init(void)
 {
+	struct input_dev *idev;
 	int ret;
 
 	if (!dmi_check_system(hdaps_whitelist)) {
@@ -563,39 +558,37 @@
 	if (ret)
 		goto out_device;
 
-	hdaps_idev = input_allocate_device();
+	hdaps_idev = input_allocate_polled_device();
 	if (!hdaps_idev) {
 		ret = -ENOMEM;
 		goto out_group;
 	}
 
+	hdaps_idev->poll = hdaps_mousedev_poll;
+	hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL;
+
 	/* initial calibrate for the input device */
 	hdaps_calibrate();
 
 	/* initialize the input class */
-	hdaps_idev->name = "hdaps";
-	hdaps_idev->dev.parent = &pdev->dev;
-	hdaps_idev->evbit[0] = BIT(EV_ABS);
-	input_set_abs_params(hdaps_idev, ABS_X,
+	idev = hdaps_idev->input;
+	idev->name = "hdaps";
+	idev->dev.parent = &pdev->dev;
+	idev->evbit[0] = BIT(EV_ABS);
+	input_set_abs_params(idev, ABS_X,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
-	input_set_abs_params(hdaps_idev, ABS_Y,
+	input_set_abs_params(idev, ABS_Y,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
 
-	ret = input_register_device(hdaps_idev);
+	ret = input_register_polled_device(hdaps_idev);
 	if (ret)
 		goto out_idev;
 
-	/* start up our timer for the input device */
-	init_timer(&hdaps_timer);
-	hdaps_timer.function = hdaps_mousedev_poll;
-	hdaps_timer.expires = jiffies + HDAPS_POLL_PERIOD;
-	add_timer(&hdaps_timer);
-
 	printk(KERN_INFO "hdaps: driver successfully loaded.\n");
 	return 0;
 
 out_idev:
-	input_free_device(hdaps_idev);
+	input_free_polled_device(hdaps_idev);
 out_group:
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 out_device:
@@ -611,8 +604,8 @@
 
 static void __exit hdaps_exit(void)
 {
-	del_timer_sync(&hdaps_timer);
-	input_unregister_device(hdaps_idev);
+	input_unregister_polled_device(hdaps_idev);
+	input_free_polled_device(hdaps_idev);
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 	platform_device_unregister(pdev);
 	platform_driver_unregister(&hdaps_driver);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 6545fa7..1b3327a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -349,6 +349,7 @@
 	struct sk_buff_head queue;
 
 	struct neighbour   *neighbour;
+	struct net_device *dev;
 
 	struct list_head    list;
 };
@@ -365,7 +366,8 @@
 				     INFINIBAND_ALEN, sizeof(void *));
 }
 
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
+				      struct net_device *dev);
 void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
 
 extern struct workqueue_struct *ipoib_workqueue;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e072f3c..362610d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -517,7 +517,7 @@
 	struct ipoib_path *path;
 	struct ipoib_neigh *neigh;
 
-	neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+	neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
 	if (!neigh) {
 		++dev->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
@@ -692,9 +692,10 @@
 				goto out;
 			}
 		} else if (neigh->ah) {
-			if (unlikely(memcmp(&neigh->dgid.raw,
+			if (unlikely((memcmp(&neigh->dgid.raw,
 					    skb->dst->neighbour->ha + 4,
-					    sizeof(union ib_gid)))) {
+					    sizeof(union ib_gid))) ||
+					 (neigh->dev != dev))) {
 				spin_lock(&priv->lock);
 				/*
 				 * It's safe to call ipoib_put_ah() inside
@@ -817,6 +818,13 @@
 	unsigned long flags;
 	struct ipoib_ah *ah = NULL;
 
+	neigh = *to_ipoib_neigh(n);
+	if (neigh) {
+		priv = netdev_priv(neigh->dev);
+		ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n",
+			  n->dev->name);
+	} else
+		return;
 	ipoib_dbg(priv,
 		  "neigh_cleanup for %06x " IPOIB_GID_FMT "\n",
 		  IPOIB_QPN(n->ha),
@@ -824,13 +832,10 @@
 
 	spin_lock_irqsave(&priv->lock, flags);
 
-	neigh = *to_ipoib_neigh(n);
-	if (neigh) {
-		if (neigh->ah)
-			ah = neigh->ah;
-		list_del(&neigh->list);
-		ipoib_neigh_free(n->dev, neigh);
-	}
+	if (neigh->ah)
+		ah = neigh->ah;
+	list_del(&neigh->list);
+	ipoib_neigh_free(n->dev, neigh);
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
@@ -838,7 +843,8 @@
 		ipoib_put_ah(ah);
 }
 
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
+				      struct net_device *dev)
 {
 	struct ipoib_neigh *neigh;
 
@@ -847,6 +853,7 @@
 		return NULL;
 
 	neigh->neighbour = neighbour;
+	neigh->dev = dev;
 	*to_ipoib_neigh(neighbour) = neigh;
 	skb_queue_head_init(&neigh->queue);
 	ipoib_cm_set(neigh, NULL);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 827820e..9bcfc7a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -705,7 +705,8 @@
 		if (skb->dst            &&
 		    skb->dst->neighbour &&
 		    !*to_ipoib_neigh(skb->dst->neighbour)) {
-			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour,
+									skb->dev);
 
 			if (neigh) {
 				kref_get(&mcast->ah->ref);
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 2d87357..63512d9 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -114,28 +114,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called joydev.
 
-config INPUT_TSDEV
-	tristate "Touchscreen interface"
-	---help---
-	  Say Y here if you have an application that only can understand the
-	  Compaq touchscreen protocol for absolute pointer data. This is
-	  useful namely for embedded configurations.
-
-	  If unsure, say N.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called tsdev.
-
-config INPUT_TSDEV_SCREEN_X
-	int "Horizontal screen resolution"
-	depends on INPUT_TSDEV
-	default "240"
-
-config INPUT_TSDEV_SCREEN_Y
-	int "Vertical screen resolution"
-	depends on INPUT_TSDEV
-	default "320"
-
 config INPUT_EVDEV
 	tristate "Event interface"
 	help
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 15eb752..99af903 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -13,7 +13,6 @@
 obj-$(CONFIG_INPUT_MOUSEDEV)	+= mousedev.o
 obj-$(CONFIG_INPUT_JOYDEV)	+= joydev.o
 obj-$(CONFIG_INPUT_EVDEV)	+= evdev.o
-obj-$(CONFIG_INPUT_TSDEV)	+= tsdev.o
 obj-$(CONFIG_INPUT_EVBUG)	+= evbug.o
 
 obj-$(CONFIG_INPUT_KEYBOARD)	+= keyboard/
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index f1c3d6c..1d62c8b 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -30,6 +30,8 @@
 	wait_queue_head_t wait;
 	struct evdev_client *grab;
 	struct list_head client_list;
+	spinlock_t client_lock; /* protects client_list */
+	struct mutex mutex;
 	struct device dev;
 };
 
@@ -37,39 +39,54 @@
 	struct input_event buffer[EVDEV_BUFFER_SIZE];
 	int head;
 	int tail;
+	spinlock_t buffer_lock; /* protects access to buffer, head and tail */
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
 };
 
 static struct evdev *evdev_table[EVDEV_MINORS];
+static DEFINE_MUTEX(evdev_table_mutex);
 
-static void evdev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+static void evdev_pass_event(struct evdev_client *client,
+			     struct input_event *event)
+{
+	/*
+	 * Interrupts are disabled, just acquire the lock
+	 */
+	spin_lock(&client->buffer_lock);
+	client->buffer[client->head++] = *event;
+	client->head &= EVDEV_BUFFER_SIZE - 1;
+	spin_unlock(&client->buffer_lock);
+
+	kill_fasync(&client->fasync, SIGIO, POLL_IN);
+}
+
+/*
+ * Pass incoming event to all connected clients.
+ */
+static void evdev_event(struct input_handle *handle,
+			unsigned int type, unsigned int code, int value)
 {
 	struct evdev *evdev = handle->private;
 	struct evdev_client *client;
+	struct input_event event;
 
-	if (evdev->grab) {
-		client = evdev->grab;
+	do_gettimeofday(&event.time);
+	event.type = type;
+	event.code = code;
+	event.value = value;
 
-		do_gettimeofday(&client->buffer[client->head].time);
-		client->buffer[client->head].type = type;
-		client->buffer[client->head].code = code;
-		client->buffer[client->head].value = value;
-		client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1);
+	rcu_read_lock();
 
-		kill_fasync(&client->fasync, SIGIO, POLL_IN);
-	} else
-		list_for_each_entry(client, &evdev->client_list, node) {
+	client = rcu_dereference(evdev->grab);
+	if (client)
+		evdev_pass_event(client, &event);
+	else
+		list_for_each_entry_rcu(client, &evdev->client_list, node)
+			evdev_pass_event(client, &event);
 
-			do_gettimeofday(&client->buffer[client->head].time);
-			client->buffer[client->head].type = type;
-			client->buffer[client->head].code = code;
-			client->buffer[client->head].value = value;
-			client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1);
-
-			kill_fasync(&client->fasync, SIGIO, POLL_IN);
-		}
+	rcu_read_unlock();
 
 	wake_up_interruptible(&evdev->wait);
 }
@@ -88,38 +105,140 @@
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
+	int retval;
+
+	retval = mutex_lock_interruptible(&evdev->mutex);
+	if (retval)
+		return retval;
 
 	if (!evdev->exist)
-		return -ENODEV;
+		retval = -ENODEV;
+	else
+		retval = input_flush_device(&evdev->handle, file);
 
-	return input_flush_device(&evdev->handle, file);
+	mutex_unlock(&evdev->mutex);
+	return retval;
 }
 
 static void evdev_free(struct device *dev)
 {
 	struct evdev *evdev = container_of(dev, struct evdev, dev);
 
-	evdev_table[evdev->minor] = NULL;
 	kfree(evdev);
 }
 
+/*
+ * Grabs an event device (along with underlying input device).
+ * This function is called with evdev->mutex taken.
+ */
+static int evdev_grab(struct evdev *evdev, struct evdev_client *client)
+{
+	int error;
+
+	if (evdev->grab)
+		return -EBUSY;
+
+	error = input_grab_device(&evdev->handle);
+	if (error)
+		return error;
+
+	rcu_assign_pointer(evdev->grab, client);
+	synchronize_rcu();
+
+	return 0;
+}
+
+static int evdev_ungrab(struct evdev *evdev, struct evdev_client *client)
+{
+	if (evdev->grab != client)
+		return  -EINVAL;
+
+	rcu_assign_pointer(evdev->grab, NULL);
+	synchronize_rcu();
+	input_release_device(&evdev->handle);
+
+	return 0;
+}
+
+static void evdev_attach_client(struct evdev *evdev,
+				struct evdev_client *client)
+{
+	spin_lock(&evdev->client_lock);
+	list_add_tail_rcu(&client->node, &evdev->client_list);
+	spin_unlock(&evdev->client_lock);
+	synchronize_rcu();
+}
+
+static void evdev_detach_client(struct evdev *evdev,
+				struct evdev_client *client)
+{
+	spin_lock(&evdev->client_lock);
+	list_del_rcu(&client->node);
+	spin_unlock(&evdev->client_lock);
+	synchronize_rcu();
+}
+
+static int evdev_open_device(struct evdev *evdev)
+{
+	int retval;
+
+	retval = mutex_lock_interruptible(&evdev->mutex);
+	if (retval)
+		return retval;
+
+	if (!evdev->exist)
+		retval = -ENODEV;
+	else if (!evdev->open++) {
+		retval = input_open_device(&evdev->handle);
+		if (retval)
+			evdev->open--;
+	}
+
+	mutex_unlock(&evdev->mutex);
+	return retval;
+}
+
+static void evdev_close_device(struct evdev *evdev)
+{
+	mutex_lock(&evdev->mutex);
+
+	if (evdev->exist && !--evdev->open)
+		input_close_device(&evdev->handle);
+
+	mutex_unlock(&evdev->mutex);
+}
+
+/*
+ * Wake up users waiting for IO so they can disconnect from
+ * dead device.
+ */
+static void evdev_hangup(struct evdev *evdev)
+{
+	struct evdev_client *client;
+
+	spin_lock(&evdev->client_lock);
+	list_for_each_entry(client, &evdev->client_list, node)
+		kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+	spin_unlock(&evdev->client_lock);
+
+	wake_up_interruptible(&evdev->wait);
+}
+
 static int evdev_release(struct inode *inode, struct file *file)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
 
-	if (evdev->grab == client) {
-		input_release_device(&evdev->handle);
-		evdev->grab = NULL;
-	}
+	mutex_lock(&evdev->mutex);
+	if (evdev->grab == client)
+		evdev_ungrab(evdev, client);
+	mutex_unlock(&evdev->mutex);
 
 	evdev_fasync(-1, file, 0);
-	list_del(&client->node);
+	evdev_detach_client(evdev, client);
 	kfree(client);
 
-	if (!--evdev->open && evdev->exist)
-		input_close_device(&evdev->handle);
-
+	evdev_close_device(evdev);
 	put_device(&evdev->dev);
 
 	return 0;
@@ -127,41 +246,44 @@
 
 static int evdev_open(struct inode *inode, struct file *file)
 {
-	struct evdev_client *client;
 	struct evdev *evdev;
+	struct evdev_client *client;
 	int i = iminor(inode) - EVDEV_MINOR_BASE;
 	int error;
 
 	if (i >= EVDEV_MINORS)
 		return -ENODEV;
 
+	error = mutex_lock_interruptible(&evdev_table_mutex);
+	if (error)
+		return error;
 	evdev = evdev_table[i];
+	if (evdev)
+		get_device(&evdev->dev);
+	mutex_unlock(&evdev_table_mutex);
 
-	if (!evdev || !evdev->exist)
+	if (!evdev)
 		return -ENODEV;
 
-	get_device(&evdev->dev);
-
 	client = kzalloc(sizeof(struct evdev_client), GFP_KERNEL);
 	if (!client) {
 		error = -ENOMEM;
 		goto err_put_evdev;
 	}
 
+	spin_lock_init(&client->buffer_lock);
 	client->evdev = evdev;
-	list_add_tail(&client->node, &evdev->client_list);
+	evdev_attach_client(evdev, client);
 
-	if (!evdev->open++ && evdev->exist) {
-		error = input_open_device(&evdev->handle);
-		if (error)
-			goto err_free_client;
-	}
+	error = evdev_open_device(evdev);
+	if (error)
+		goto err_free_client;
 
 	file->private_data = client;
 	return 0;
 
  err_free_client:
-	list_del(&client->node);
+	evdev_detach_client(evdev, client);
 	kfree(client);
  err_put_evdev:
 	put_device(&evdev->dev);
@@ -197,12 +319,14 @@
 		sizeof(struct input_event_compat) : sizeof(struct input_event);
 }
 
-static int evdev_event_from_user(const char __user *buffer, struct input_event *event)
+static int evdev_event_from_user(const char __user *buffer,
+				 struct input_event *event)
 {
 	if (COMPAT_TEST) {
 		struct input_event_compat compat_event;
 
-		if (copy_from_user(&compat_event, buffer, sizeof(struct input_event_compat)))
+		if (copy_from_user(&compat_event, buffer,
+				   sizeof(struct input_event_compat)))
 			return -EFAULT;
 
 		event->time.tv_sec = compat_event.time.tv_sec;
@@ -219,7 +343,8 @@
 	return 0;
 }
 
-static int evdev_event_to_user(char __user *buffer, const struct input_event *event)
+static int evdev_event_to_user(char __user *buffer,
+				const struct input_event *event)
 {
 	if (COMPAT_TEST) {
 		struct input_event_compat compat_event;
@@ -230,7 +355,8 @@
 		compat_event.code = event->code;
 		compat_event.value = event->value;
 
-		if (copy_to_user(buffer, &compat_event, sizeof(struct input_event_compat)))
+		if (copy_to_user(buffer, &compat_event,
+				 sizeof(struct input_event_compat)))
 			return -EFAULT;
 
 	} else {
@@ -248,7 +374,8 @@
 	return sizeof(struct input_event);
 }
 
-static int evdev_event_from_user(const char __user *buffer, struct input_event *event)
+static int evdev_event_from_user(const char __user *buffer,
+				 struct input_event *event)
 {
 	if (copy_from_user(event, buffer, sizeof(struct input_event)))
 		return -EFAULT;
@@ -256,7 +383,8 @@
 	return 0;
 }
 
-static int evdev_event_to_user(char __user *buffer, const struct input_event *event)
+static int evdev_event_to_user(char __user *buffer,
+				const struct input_event *event)
 {
 	if (copy_to_user(buffer, event, sizeof(struct input_event)))
 		return -EFAULT;
@@ -266,37 +394,71 @@
 
 #endif /* CONFIG_COMPAT */
 
-static ssize_t evdev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+static ssize_t evdev_write(struct file *file, const char __user *buffer,
+			   size_t count, loff_t *ppos)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
 	struct input_event event;
-	int retval = 0;
+	int retval;
 
-	if (!evdev->exist)
-		return -ENODEV;
+	retval = mutex_lock_interruptible(&evdev->mutex);
+	if (retval)
+		return retval;
+
+	if (!evdev->exist) {
+		retval = -ENODEV;
+		goto out;
+	}
 
 	while (retval < count) {
 
-		if (evdev_event_from_user(buffer + retval, &event))
-			return -EFAULT;
-		input_inject_event(&evdev->handle, event.type, event.code, event.value);
+		if (evdev_event_from_user(buffer + retval, &event)) {
+			retval = -EFAULT;
+			goto out;
+		}
+
+		input_inject_event(&evdev->handle,
+				   event.type, event.code, event.value);
 		retval += evdev_event_size();
 	}
 
+ out:
+	mutex_unlock(&evdev->mutex);
 	return retval;
 }
 
-static ssize_t evdev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+static int evdev_fetch_next_event(struct evdev_client *client,
+				  struct input_event *event)
+{
+	int have_event;
+
+	spin_lock_irq(&client->buffer_lock);
+
+	have_event = client->head != client->tail;
+	if (have_event) {
+		*event = client->buffer[client->tail++];
+		client->tail &= EVDEV_BUFFER_SIZE - 1;
+	}
+
+	spin_unlock_irq(&client->buffer_lock);
+
+	return have_event;
+}
+
+static ssize_t evdev_read(struct file *file, char __user *buffer,
+			  size_t count, loff_t *ppos)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
+	struct input_event event;
 	int retval;
 
 	if (count < evdev_event_size())
 		return -EINVAL;
 
-	if (client->head == client->tail && evdev->exist && (file->f_flags & O_NONBLOCK))
+	if (client->head == client->tail && evdev->exist &&
+	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	retval = wait_event_interruptible(evdev->wait,
@@ -307,14 +469,12 @@
 	if (!evdev->exist)
 		return -ENODEV;
 
-	while (client->head != client->tail && retval + evdev_event_size() <= count) {
+	while (retval + evdev_event_size() <= count &&
+	       evdev_fetch_next_event(client, &event)) {
 
-		struct input_event *event = (struct input_event *) client->buffer + client->tail;
-
-		if (evdev_event_to_user(buffer + retval, event))
+		if (evdev_event_to_user(buffer + retval, &event))
 			return -EFAULT;
 
-		client->tail = (client->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
 		retval += evdev_event_size();
 	}
 
@@ -409,8 +569,8 @@
 	return copy_to_user(p, str, len) ? -EFAULT : len;
 }
 
-static long evdev_ioctl_handler(struct file *file, unsigned int cmd,
-				void __user *p, int compat_mode)
+static long evdev_do_ioctl(struct file *file, unsigned int cmd,
+			   void __user *p, int compat_mode)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
@@ -421,215 +581,289 @@
 	int i, t, u, v;
 	int error;
 
-	if (!evdev->exist)
-		return -ENODEV;
-
 	switch (cmd) {
 
-		case EVIOCGVERSION:
-			return put_user(EV_VERSION, ip);
+	case EVIOCGVERSION:
+		return put_user(EV_VERSION, ip);
 
-		case EVIOCGID:
-			if (copy_to_user(p, &dev->id, sizeof(struct input_id)))
-				return -EFAULT;
-			return 0;
+	case EVIOCGID:
+		if (copy_to_user(p, &dev->id, sizeof(struct input_id)))
+			return -EFAULT;
+		return 0;
 
-		case EVIOCGREP:
-			if (!test_bit(EV_REP, dev->evbit))
-				return -ENOSYS;
-			if (put_user(dev->rep[REP_DELAY], ip))
-				return -EFAULT;
-			if (put_user(dev->rep[REP_PERIOD], ip + 1))
-				return -EFAULT;
-			return 0;
+	case EVIOCGREP:
+		if (!test_bit(EV_REP, dev->evbit))
+			return -ENOSYS;
+		if (put_user(dev->rep[REP_DELAY], ip))
+			return -EFAULT;
+		if (put_user(dev->rep[REP_PERIOD], ip + 1))
+			return -EFAULT;
+		return 0;
 
-		case EVIOCSREP:
-			if (!test_bit(EV_REP, dev->evbit))
-				return -ENOSYS;
-			if (get_user(u, ip))
-				return -EFAULT;
-			if (get_user(v, ip + 1))
-				return -EFAULT;
+	case EVIOCSREP:
+		if (!test_bit(EV_REP, dev->evbit))
+			return -ENOSYS;
+		if (get_user(u, ip))
+			return -EFAULT;
+		if (get_user(v, ip + 1))
+			return -EFAULT;
 
-			input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u);
-			input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v);
+		input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u);
+		input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v);
 
-			return 0;
+		return 0;
 
-		case EVIOCGKEYCODE:
-			if (get_user(t, ip))
-				return -EFAULT;
+	case EVIOCGKEYCODE:
+		if (get_user(t, ip))
+			return -EFAULT;
 
-			error = dev->getkeycode(dev, t, &v);
-			if (error)
-				return error;
-
-			if (put_user(v, ip + 1))
-				return -EFAULT;
-
-			return 0;
-
-		case EVIOCSKEYCODE:
-			if (get_user(t, ip) || get_user(v, ip + 1))
-				return -EFAULT;
-
-			return dev->setkeycode(dev, t, v);
-
-		case EVIOCSFF:
-			if (copy_from_user(&effect, p, sizeof(effect)))
-				return -EFAULT;
-
-			error = input_ff_upload(dev, &effect, file);
-
-			if (put_user(effect.id, &(((struct ff_effect __user *)p)->id)))
-				return -EFAULT;
-
+		error = dev->getkeycode(dev, t, &v);
+		if (error)
 			return error;
 
-		case EVIOCRMFF:
-			return input_ff_erase(dev, (int)(unsigned long) p, file);
+		if (put_user(v, ip + 1))
+			return -EFAULT;
 
-		case EVIOCGEFFECTS:
-			i = test_bit(EV_FF, dev->evbit) ? dev->ff->max_effects : 0;
-			if (put_user(i, ip))
-				return -EFAULT;
-			return 0;
+		return 0;
 
-		case EVIOCGRAB:
-			if (p) {
-				if (evdev->grab)
-					return -EBUSY;
-				if (input_grab_device(&evdev->handle))
-					return -EBUSY;
-				evdev->grab = client;
-				return 0;
-			} else {
-				if (evdev->grab != client)
-					return -EINVAL;
-				input_release_device(&evdev->handle);
-				evdev->grab = NULL;
+	case EVIOCSKEYCODE:
+		if (get_user(t, ip) || get_user(v, ip + 1))
+			return -EFAULT;
+
+		return dev->setkeycode(dev, t, v);
+
+	case EVIOCSFF:
+		if (copy_from_user(&effect, p, sizeof(effect)))
+			return -EFAULT;
+
+		error = input_ff_upload(dev, &effect, file);
+
+		if (put_user(effect.id, &(((struct ff_effect __user *)p)->id)))
+			return -EFAULT;
+
+		return error;
+
+	case EVIOCRMFF:
+		return input_ff_erase(dev, (int)(unsigned long) p, file);
+
+	case EVIOCGEFFECTS:
+		i = test_bit(EV_FF, dev->evbit) ?
+				dev->ff->max_effects : 0;
+		if (put_user(i, ip))
+			return -EFAULT;
+		return 0;
+
+	case EVIOCGRAB:
+		if (p)
+			return evdev_grab(evdev, client);
+		else
+			return evdev_ungrab(evdev, client);
+
+	default:
+
+		if (_IOC_TYPE(cmd) != 'E')
+			return -EINVAL;
+
+		if (_IOC_DIR(cmd) == _IOC_READ) {
+
+			if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0, 0))) {
+
+				unsigned long *bits;
+				int len;
+
+				switch (_IOC_NR(cmd) & EV_MAX) {
+
+				case      0: bits = dev->evbit;  len = EV_MAX;  break;
+				case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
+				case EV_REL: bits = dev->relbit; len = REL_MAX; break;
+				case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
+				case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
+				case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
+				case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
+				case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
+				case EV_SW:  bits = dev->swbit;  len = SW_MAX;  break;
+				default: return -EINVAL;
+			}
+				return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode);
+			}
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0)))
+				return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd),
+						    p, compat_mode);
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0)))
+				return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd),
+						    p, compat_mode);
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
+				return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd),
+						    p, compat_mode);
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0)))
+				return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd),
+						    p, compat_mode);
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0)))
+				return str_to_user(dev->name, _IOC_SIZE(cmd), p);
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0)))
+				return str_to_user(dev->phys, _IOC_SIZE(cmd), p);
+
+			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0)))
+				return str_to_user(dev->uniq, _IOC_SIZE(cmd), p);
+
+			if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+
+				t = _IOC_NR(cmd) & ABS_MAX;
+
+				abs.value = dev->abs[t];
+				abs.minimum = dev->absmin[t];
+				abs.maximum = dev->absmax[t];
+				abs.fuzz = dev->absfuzz[t];
+				abs.flat = dev->absflat[t];
+
+				if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+					return -EFAULT;
+
 				return 0;
 			}
 
-		default:
+		}
 
-			if (_IOC_TYPE(cmd) != 'E')
-				return -EINVAL;
+		if (_IOC_DIR(cmd) == _IOC_WRITE) {
 
-			if (_IOC_DIR(cmd) == _IOC_READ) {
+			if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
 
-				if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
+				t = _IOC_NR(cmd) & ABS_MAX;
 
-					unsigned long *bits;
-					int len;
+				if (copy_from_user(&abs, p,
+						sizeof(struct input_absinfo)))
+					return -EFAULT;
 
-					switch (_IOC_NR(cmd) & EV_MAX) {
-						case      0: bits = dev->evbit;  len = EV_MAX;  break;
-						case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
-						case EV_REL: bits = dev->relbit; len = REL_MAX; break;
-						case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
-						case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
-						case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
-						case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
-						case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
-						case EV_SW:  bits = dev->swbit;  len = SW_MAX;  break;
-						default: return -EINVAL;
-					}
-					return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode);
-				}
+				/*
+				 * Take event lock to ensure that we are not
+				 * changing device parameters in the middle
+				 * of event.
+				 */
+				spin_lock_irq(&dev->event_lock);
 
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0)))
-					return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd),
-							    p, compat_mode);
+				dev->abs[t] = abs.value;
+				dev->absmin[t] = abs.minimum;
+				dev->absmax[t] = abs.maximum;
+				dev->absfuzz[t] = abs.fuzz;
+				dev->absflat[t] = abs.flat;
 
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0)))
-					return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd),
-							    p, compat_mode);
+				spin_unlock_irq(&dev->event_lock);
 
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
-					return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd),
-							    p, compat_mode);
-
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0)))
-					return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd),
-							    p, compat_mode);
-
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0)))
-					return str_to_user(dev->name, _IOC_SIZE(cmd), p);
-
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0)))
-					return str_to_user(dev->phys, _IOC_SIZE(cmd), p);
-
-				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0)))
-					return str_to_user(dev->uniq, _IOC_SIZE(cmd), p);
-
-				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
-
-					t = _IOC_NR(cmd) & ABS_MAX;
-
-					abs.value = dev->abs[t];
-					abs.minimum = dev->absmin[t];
-					abs.maximum = dev->absmax[t];
-					abs.fuzz = dev->absfuzz[t];
-					abs.flat = dev->absflat[t];
-
-					if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
-						return -EFAULT;
-
-					return 0;
-				}
-
+				return 0;
 			}
-
-			if (_IOC_DIR(cmd) == _IOC_WRITE) {
-
-				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
-
-					t = _IOC_NR(cmd) & ABS_MAX;
-
-					if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
-						return -EFAULT;
-
-					dev->abs[t] = abs.value;
-					dev->absmin[t] = abs.minimum;
-					dev->absmax[t] = abs.maximum;
-					dev->absfuzz[t] = abs.fuzz;
-					dev->absflat[t] = abs.flat;
-
-					return 0;
-				}
-			}
+		}
 	}
 	return -EINVAL;
 }
 
+static long evdev_ioctl_handler(struct file *file, unsigned int cmd,
+				void __user *p, int compat_mode)
+{
+	struct evdev_client *client = file->private_data;
+	struct evdev *evdev = client->evdev;
+	int retval;
+
+	retval = mutex_lock_interruptible(&evdev->mutex);
+	if (retval)
+		return retval;
+
+	if (!evdev->exist) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	retval = evdev_do_ioctl(file, cmd, p, compat_mode);
+
+ out:
+	mutex_unlock(&evdev->mutex);
+	return retval;
+}
+
 static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	return evdev_ioctl_handler(file, cmd, (void __user *)arg, 0);
 }
 
 #ifdef CONFIG_COMPAT
-static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
+static long evdev_ioctl_compat(struct file *file,
+				unsigned int cmd, unsigned long arg)
 {
 	return evdev_ioctl_handler(file, cmd, compat_ptr(arg), 1);
 }
 #endif
 
 static const struct file_operations evdev_fops = {
-	.owner =	THIS_MODULE,
-	.read =		evdev_read,
-	.write =	evdev_write,
-	.poll =		evdev_poll,
-	.open =		evdev_open,
-	.release =	evdev_release,
-	.unlocked_ioctl = evdev_ioctl,
+	.owner		= THIS_MODULE,
+	.read		= evdev_read,
+	.write		= evdev_write,
+	.poll		= evdev_poll,
+	.open		= evdev_open,
+	.release	= evdev_release,
+	.unlocked_ioctl	= evdev_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl =	evdev_ioctl_compat,
+	.compat_ioctl	= evdev_ioctl_compat,
 #endif
-	.fasync =	evdev_fasync,
-	.flush =	evdev_flush
+	.fasync		= evdev_fasync,
+	.flush		= evdev_flush
 };
 
+static int evdev_install_chrdev(struct evdev *evdev)
+{
+	/*
+	 * No need to do any locking here as calls to connect and
+	 * disconnect are serialized by the input core
+	 */
+	evdev_table[evdev->minor] = evdev;
+	return 0;
+}
+
+static void evdev_remove_chrdev(struct evdev *evdev)
+{
+	/*
+	 * Lock evdev table to prevent race with evdev_open()
+	 */
+	mutex_lock(&evdev_table_mutex);
+	evdev_table[evdev->minor] = NULL;
+	mutex_unlock(&evdev_table_mutex);
+}
+
+/*
+ * Mark device non-existent. This disables writes, ioctls and
+ * prevents new users from opening the device. Already posted
+ * blocking reads will stay, however new ones will fail.
+ */
+static void evdev_mark_dead(struct evdev *evdev)
+{
+	mutex_lock(&evdev->mutex);
+	evdev->exist = 0;
+	mutex_unlock(&evdev->mutex);
+}
+
+static void evdev_cleanup(struct evdev *evdev)
+{
+	struct input_handle *handle = &evdev->handle;
+
+	evdev_mark_dead(evdev);
+	evdev_hangup(evdev);
+	evdev_remove_chrdev(evdev);
+
+	/* evdev is marked dead so no one else accesses evdev->open */
+	if (evdev->open) {
+		input_flush_device(handle, NULL);
+		input_close_device(handle);
+	}
+}
+
+/*
+ * Create new evdev device. Note that input core serializes calls
+ * to connect and disconnect so we don't need to lock evdev_table here.
+ */
 static int evdev_connect(struct input_handler *handler, struct input_dev *dev,
 			 const struct input_device_id *id)
 {
@@ -637,7 +871,10 @@
 	int minor;
 	int error;
 
-	for (minor = 0; minor < EVDEV_MINORS && evdev_table[minor]; minor++);
+	for (minor = 0; minor < EVDEV_MINORS; minor++)
+		if (!evdev_table[minor])
+			break;
+
 	if (minor == EVDEV_MINORS) {
 		printk(KERN_ERR "evdev: no more free evdev devices\n");
 		return -ENFILE;
@@ -648,38 +885,44 @@
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&evdev->client_list);
+	spin_lock_init(&evdev->client_lock);
+	mutex_init(&evdev->mutex);
 	init_waitqueue_head(&evdev->wait);
 
+	snprintf(evdev->name, sizeof(evdev->name), "event%d", minor);
 	evdev->exist = 1;
 	evdev->minor = minor;
+
 	evdev->handle.dev = dev;
 	evdev->handle.name = evdev->name;
 	evdev->handle.handler = handler;
 	evdev->handle.private = evdev;
-	snprintf(evdev->name, sizeof(evdev->name), "event%d", minor);
 
-	snprintf(evdev->dev.bus_id, sizeof(evdev->dev.bus_id),
-		 "event%d", minor);
+	strlcpy(evdev->dev.bus_id, evdev->name, sizeof(evdev->dev.bus_id));
+	evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor);
 	evdev->dev.class = &input_class;
 	evdev->dev.parent = &dev->dev;
-	evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor);
 	evdev->dev.release = evdev_free;
 	device_initialize(&evdev->dev);
 
-	evdev_table[minor] = evdev;
-
-	error = device_add(&evdev->dev);
+	error = input_register_handle(&evdev->handle);
 	if (error)
 		goto err_free_evdev;
 
-	error = input_register_handle(&evdev->handle);
+	error = evdev_install_chrdev(evdev);
 	if (error)
-		goto err_delete_evdev;
+		goto err_unregister_handle;
+
+	error = device_add(&evdev->dev);
+	if (error)
+		goto err_cleanup_evdev;
 
 	return 0;
 
- err_delete_evdev:
-	device_del(&evdev->dev);
+ err_cleanup_evdev:
+	evdev_cleanup(evdev);
+ err_unregister_handle:
+	input_unregister_handle(&evdev->handle);
  err_free_evdev:
 	put_device(&evdev->dev);
 	return error;
@@ -688,21 +931,10 @@
 static void evdev_disconnect(struct input_handle *handle)
 {
 	struct evdev *evdev = handle->private;
-	struct evdev_client *client;
 
-	input_unregister_handle(handle);
 	device_del(&evdev->dev);
-
-	evdev->exist = 0;
-
-	if (evdev->open) {
-		input_flush_device(handle, NULL);
-		input_close_device(handle);
-		list_for_each_entry(client, &evdev->client_list, node)
-			kill_fasync(&client->fasync, SIGIO, POLL_HUP);
-		wake_up_interruptible(&evdev->wait);
-	}
-
+	evdev_cleanup(evdev);
+	input_unregister_handle(handle);
 	put_device(&evdev->dev);
 }
 
@@ -714,13 +946,13 @@
 MODULE_DEVICE_TABLE(input, evdev_ids);
 
 static struct input_handler evdev_handler = {
-	.event =	evdev_event,
-	.connect =	evdev_connect,
-	.disconnect =	evdev_disconnect,
-	.fops =		&evdev_fops,
-	.minor =	EVDEV_MINOR_BASE,
-	.name =		"evdev",
-	.id_table =	evdev_ids,
+	.event		= evdev_event,
+	.connect	= evdev_connect,
+	.disconnect	= evdev_disconnect,
+	.fops		= &evdev_fops,
+	.minor		= EVDEV_MINOR_BASE,
+	.name		= "evdev",
+	.id_table	= evdev_ids,
 };
 
 static int __init evdev_init(void)
diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index b773d4c..92b3598 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -70,6 +70,7 @@
 {
 	struct input_polled_dev *dev = input->private;
 	int error;
+	unsigned long ticks;
 
 	error = input_polldev_start_workqueue();
 	if (error)
@@ -78,8 +79,10 @@
 	if (dev->flush)
 		dev->flush(dev);
 
-	queue_delayed_work(polldev_wq, &dev->work,
-			   msecs_to_jiffies(dev->poll_interval));
+	ticks = msecs_to_jiffies(dev->poll_interval);
+	if (ticks >= HZ)
+		ticks = round_jiffies(ticks);
+	queue_delayed_work(polldev_wq, &dev->work, ticks);
 
 	return 0;
 }
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 5dc361c..2f2b020 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -17,10 +17,10 @@
 #include <linux/major.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
+#include <linux/rcupdate.h>
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
 MODULE_DESCRIPTION("Input core");
@@ -31,8 +31,222 @@
 static LIST_HEAD(input_dev_list);
 static LIST_HEAD(input_handler_list);
 
+/*
+ * input_mutex protects access to both input_dev_list and input_handler_list.
+ * This also causes input_[un]register_device and input_[un]register_handler
+ * be mutually exclusive which simplifies locking in drivers implementing
+ * input handlers.
+ */
+static DEFINE_MUTEX(input_mutex);
+
 static struct input_handler *input_table[8];
 
+static inline int is_event_supported(unsigned int code,
+				     unsigned long *bm, unsigned int max)
+{
+	return code <= max && test_bit(code, bm);
+}
+
+static int input_defuzz_abs_event(int value, int old_val, int fuzz)
+{
+	if (fuzz) {
+		if (value > old_val - fuzz / 2 && value < old_val + fuzz / 2)
+			return old_val;
+
+		if (value > old_val - fuzz && value < old_val + fuzz)
+			return (old_val * 3 + value) / 4;
+
+		if (value > old_val - fuzz * 2 && value < old_val + fuzz * 2)
+			return (old_val + value) / 2;
+	}
+
+	return value;
+}
+
+/*
+ * Pass event through all open handles. This function is called with
+ * dev->event_lock held and interrupts disabled.
+ */
+static void input_pass_event(struct input_dev *dev,
+			     unsigned int type, unsigned int code, int value)
+{
+	struct input_handle *handle;
+
+	rcu_read_lock();
+
+	handle = rcu_dereference(dev->grab);
+	if (handle)
+		handle->handler->event(handle, type, code, value);
+	else
+		list_for_each_entry_rcu(handle, &dev->h_list, d_node)
+			if (handle->open)
+				handle->handler->event(handle,
+							type, code, value);
+	rcu_read_unlock();
+}
+
+/*
+ * Generate software autorepeat event. Note that we take
+ * dev->event_lock here to avoid racing with input_event
+ * which may cause keys get "stuck".
+ */
+static void input_repeat_key(unsigned long data)
+{
+	struct input_dev *dev = (void *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+
+	if (test_bit(dev->repeat_key, dev->key) &&
+	    is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) {
+
+		input_pass_event(dev, EV_KEY, dev->repeat_key, 2);
+
+		if (dev->sync) {
+			/*
+			 * Only send SYN_REPORT if we are not in a middle
+			 * of driver parsing a new hardware packet.
+			 * Otherwise assume that the driver will send
+			 * SYN_REPORT once it's done.
+			 */
+			input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
+		}
+
+		if (dev->rep[REP_PERIOD])
+			mod_timer(&dev->timer, jiffies +
+					msecs_to_jiffies(dev->rep[REP_PERIOD]));
+	}
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+static void input_start_autorepeat(struct input_dev *dev, int code)
+{
+	if (test_bit(EV_REP, dev->evbit) &&
+	    dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] &&
+	    dev->timer.data) {
+		dev->repeat_key = code;
+		mod_timer(&dev->timer,
+			  jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]));
+	}
+}
+
+#define INPUT_IGNORE_EVENT	0
+#define INPUT_PASS_TO_HANDLERS	1
+#define INPUT_PASS_TO_DEVICE	2
+#define INPUT_PASS_TO_ALL	(INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE)
+
+static void input_handle_event(struct input_dev *dev,
+			       unsigned int type, unsigned int code, int value)
+{
+	int disposition = INPUT_IGNORE_EVENT;
+
+	switch (type) {
+
+	case EV_SYN:
+		switch (code) {
+		case SYN_CONFIG:
+			disposition = INPUT_PASS_TO_ALL;
+			break;
+
+		case SYN_REPORT:
+			if (!dev->sync) {
+				dev->sync = 1;
+				disposition = INPUT_PASS_TO_HANDLERS;
+			}
+			break;
+		}
+		break;
+
+	case EV_KEY:
+		if (is_event_supported(code, dev->keybit, KEY_MAX) &&
+		    !!test_bit(code, dev->key) != value) {
+
+			if (value != 2) {
+				__change_bit(code, dev->key);
+				if (value)
+					input_start_autorepeat(dev, code);
+			}
+
+			disposition = INPUT_PASS_TO_HANDLERS;
+		}
+		break;
+
+	case EV_SW:
+		if (is_event_supported(code, dev->swbit, SW_MAX) &&
+		    !!test_bit(code, dev->sw) != value) {
+
+			__change_bit(code, dev->sw);
+			disposition = INPUT_PASS_TO_HANDLERS;
+		}
+		break;
+
+	case EV_ABS:
+		if (is_event_supported(code, dev->absbit, ABS_MAX)) {
+
+			value = input_defuzz_abs_event(value,
+					dev->abs[code], dev->absfuzz[code]);
+
+			if (dev->abs[code] != value) {
+				dev->abs[code] = value;
+				disposition = INPUT_PASS_TO_HANDLERS;
+			}
+		}
+		break;
+
+	case EV_REL:
+		if (is_event_supported(code, dev->relbit, REL_MAX) && value)
+			disposition = INPUT_PASS_TO_HANDLERS;
+
+		break;
+
+	case EV_MSC:
+		if (is_event_supported(code, dev->mscbit, MSC_MAX))
+			disposition = INPUT_PASS_TO_ALL;
+
+		break;
+
+	case EV_LED:
+		if (is_event_supported(code, dev->ledbit, LED_MAX) &&
+		    !!test_bit(code, dev->led) != value) {
+
+			__change_bit(code, dev->led);
+			disposition = INPUT_PASS_TO_ALL;
+		}
+		break;
+
+	case EV_SND:
+		if (is_event_supported(code, dev->sndbit, SND_MAX)) {
+
+			if (!!test_bit(code, dev->snd) != !!value)
+				__change_bit(code, dev->snd);
+			disposition = INPUT_PASS_TO_ALL;
+		}
+		break;
+
+	case EV_REP:
+		if (code <= REP_MAX && value >= 0 && dev->rep[code] != value) {
+			dev->rep[code] = value;
+			disposition = INPUT_PASS_TO_ALL;
+		}
+		break;
+
+	case EV_FF:
+		if (value >= 0)
+			disposition = INPUT_PASS_TO_ALL;
+		break;
+	}
+
+	if (type != EV_SYN)
+		dev->sync = 0;
+
+	if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event)
+		dev->event(dev, type, code, value);
+
+	if (disposition & INPUT_PASS_TO_HANDLERS)
+		input_pass_event(dev, type, code, value);
+}
+
 /**
  * input_event() - report new input event
  * @dev: device that generated the event
@@ -40,158 +254,22 @@
  * @code: event code
  * @value: value of the event
  *
- * This function should be used by drivers implementing various input devices
- * See also input_inject_event()
+ * This function should be used by drivers implementing various input
+ * devices. See also input_inject_event().
  */
-void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+
+void input_event(struct input_dev *dev,
+		 unsigned int type, unsigned int code, int value)
 {
-	struct input_handle *handle;
+	unsigned long flags;
 
-	if (type > EV_MAX || !test_bit(type, dev->evbit))
-		return;
+	if (is_event_supported(type, dev->evbit, EV_MAX)) {
 
-	add_input_randomness(type, code, value);
-
-	switch (type) {
-
-		case EV_SYN:
-			switch (code) {
-				case SYN_CONFIG:
-					if (dev->event)
-						dev->event(dev, type, code, value);
-					break;
-
-				case SYN_REPORT:
-					if (dev->sync)
-						return;
-					dev->sync = 1;
-					break;
-			}
-			break;
-
-		case EV_KEY:
-
-			if (code > KEY_MAX || !test_bit(code, dev->keybit) || !!test_bit(code, dev->key) == value)
-				return;
-
-			if (value == 2)
-				break;
-
-			change_bit(code, dev->key);
-
-			if (test_bit(EV_REP, dev->evbit) && dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && dev->timer.data && value) {
-				dev->repeat_key = code;
-				mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]));
-			}
-
-			break;
-
-		case EV_SW:
-
-			if (code > SW_MAX || !test_bit(code, dev->swbit) || !!test_bit(code, dev->sw) == value)
-				return;
-
-			change_bit(code, dev->sw);
-
-			break;
-
-		case EV_ABS:
-
-			if (code > ABS_MAX || !test_bit(code, dev->absbit))
-				return;
-
-			if (dev->absfuzz[code]) {
-				if ((value > dev->abs[code] - (dev->absfuzz[code] >> 1)) &&
-				    (value < dev->abs[code] + (dev->absfuzz[code] >> 1)))
-					return;
-
-				if ((value > dev->abs[code] - dev->absfuzz[code]) &&
-				    (value < dev->abs[code] + dev->absfuzz[code]))
-					value = (dev->abs[code] * 3 + value) >> 2;
-
-				if ((value > dev->abs[code] - (dev->absfuzz[code] << 1)) &&
-				    (value < dev->abs[code] + (dev->absfuzz[code] << 1)))
-					value = (dev->abs[code] + value) >> 1;
-			}
-
-			if (dev->abs[code] == value)
-				return;
-
-			dev->abs[code] = value;
-			break;
-
-		case EV_REL:
-
-			if (code > REL_MAX || !test_bit(code, dev->relbit) || (value == 0))
-				return;
-
-			break;
-
-		case EV_MSC:
-
-			if (code > MSC_MAX || !test_bit(code, dev->mscbit))
-				return;
-
-			if (dev->event)
-				dev->event(dev, type, code, value);
-
-			break;
-
-		case EV_LED:
-
-			if (code > LED_MAX || !test_bit(code, dev->ledbit) || !!test_bit(code, dev->led) == value)
-				return;
-
-			change_bit(code, dev->led);
-
-			if (dev->event)
-				dev->event(dev, type, code, value);
-
-			break;
-
-		case EV_SND:
-
-			if (code > SND_MAX || !test_bit(code, dev->sndbit))
-				return;
-
-			if (!!test_bit(code, dev->snd) != !!value)
-				change_bit(code, dev->snd);
-
-			if (dev->event)
-				dev->event(dev, type, code, value);
-
-			break;
-
-		case EV_REP:
-
-			if (code > REP_MAX || value < 0 || dev->rep[code] == value)
-				return;
-
-			dev->rep[code] = value;
-			if (dev->event)
-				dev->event(dev, type, code, value);
-
-			break;
-
-		case EV_FF:
-
-			if (value < 0)
-				return;
-
-			if (dev->event)
-				dev->event(dev, type, code, value);
-			break;
+		spin_lock_irqsave(&dev->event_lock, flags);
+		add_input_randomness(type, code, value);
+		input_handle_event(dev, type, code, value);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
-
-	if (type != EV_SYN)
-		dev->sync = 0;
-
-	if (dev->grab)
-		dev->grab->handler->event(dev->grab, type, code, value);
-	else
-		list_for_each_entry(handle, &dev->h_list, d_node)
-			if (handle->open)
-				handle->handler->event(handle, type, code, value);
 }
 EXPORT_SYMBOL(input_event);
 
@@ -202,102 +280,228 @@
  * @code: event code
  * @value: value of the event
  *
- * Similar to input_event() but will ignore event if device is "grabbed" and handle
- * injecting event is not the one that owns the device.
+ * Similar to input_event() but will ignore event if device is
+ * "grabbed" and handle injecting event is not the one that owns
+ * the device.
  */
-void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+void input_inject_event(struct input_handle *handle,
+			unsigned int type, unsigned int code, int value)
 {
-	if (!handle->dev->grab || handle->dev->grab == handle)
-		input_event(handle->dev, type, code, value);
+	struct input_dev *dev = handle->dev;
+	struct input_handle *grab;
+	unsigned long flags;
+
+	if (is_event_supported(type, dev->evbit, EV_MAX)) {
+		spin_lock_irqsave(&dev->event_lock, flags);
+
+		rcu_read_lock();
+		grab = rcu_dereference(dev->grab);
+		if (!grab || grab == handle)
+			input_handle_event(dev, type, code, value);
+		rcu_read_unlock();
+
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
 }
 EXPORT_SYMBOL(input_inject_event);
 
-static void input_repeat_key(unsigned long data)
-{
-	struct input_dev *dev = (void *) data;
-
-	if (!test_bit(dev->repeat_key, dev->key))
-		return;
-
-	input_event(dev, EV_KEY, dev->repeat_key, 2);
-	input_sync(dev);
-
-	if (dev->rep[REP_PERIOD])
-		mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_PERIOD]));
-}
-
+/**
+ * input_grab_device - grabs device for exclusive use
+ * @handle: input handle that wants to own the device
+ *
+ * When a device is grabbed by an input handle all events generated by
+ * the device are delivered only to this handle. Also events injected
+ * by other input handles are ignored while device is grabbed.
+ */
 int input_grab_device(struct input_handle *handle)
 {
-	if (handle->dev->grab)
-		return -EBUSY;
+	struct input_dev *dev = handle->dev;
+	int retval;
 
-	handle->dev->grab = handle;
-	return 0;
+	retval = mutex_lock_interruptible(&dev->mutex);
+	if (retval)
+		return retval;
+
+	if (dev->grab) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	rcu_assign_pointer(dev->grab, handle);
+	synchronize_rcu();
+
+ out:
+	mutex_unlock(&dev->mutex);
+	return retval;
 }
 EXPORT_SYMBOL(input_grab_device);
 
-void input_release_device(struct input_handle *handle)
+static void __input_release_device(struct input_handle *handle)
 {
 	struct input_dev *dev = handle->dev;
 
 	if (dev->grab == handle) {
-		dev->grab = NULL;
+		rcu_assign_pointer(dev->grab, NULL);
+		/* Make sure input_pass_event() notices that grab is gone */
+		synchronize_rcu();
 
 		list_for_each_entry(handle, &dev->h_list, d_node)
-			if (handle->handler->start)
+			if (handle->open && handle->handler->start)
 				handle->handler->start(handle);
 	}
 }
+
+/**
+ * input_release_device - release previously grabbed device
+ * @handle: input handle that owns the device
+ *
+ * Releases previously grabbed device so that other input handles can
+ * start receiving input events. Upon release all handlers attached
+ * to the device have their start() method called so they have a change
+ * to synchronize device state with the rest of the system.
+ */
+void input_release_device(struct input_handle *handle)
+{
+	struct input_dev *dev = handle->dev;
+
+	mutex_lock(&dev->mutex);
+	__input_release_device(handle);
+	mutex_unlock(&dev->mutex);
+}
 EXPORT_SYMBOL(input_release_device);
 
+/**
+ * input_open_device - open input device
+ * @handle: handle through which device is being accessed
+ *
+ * This function should be called by input handlers when they
+ * want to start receive events from given input device.
+ */
 int input_open_device(struct input_handle *handle)
 {
 	struct input_dev *dev = handle->dev;
-	int err;
+	int retval;
 
-	err = mutex_lock_interruptible(&dev->mutex);
-	if (err)
-		return err;
+	retval = mutex_lock_interruptible(&dev->mutex);
+	if (retval)
+		return retval;
+
+	if (dev->going_away) {
+		retval = -ENODEV;
+		goto out;
+	}
 
 	handle->open++;
 
 	if (!dev->users++ && dev->open)
-		err = dev->open(dev);
+		retval = dev->open(dev);
 
-	if (err)
-		handle->open--;
+	if (retval) {
+		dev->users--;
+		if (!--handle->open) {
+			/*
+			 * Make sure we are not delivering any more events
+			 * through this handle
+			 */
+			synchronize_rcu();
+		}
+	}
 
+ out:
 	mutex_unlock(&dev->mutex);
-
-	return err;
+	return retval;
 }
 EXPORT_SYMBOL(input_open_device);
 
-int input_flush_device(struct input_handle* handle, struct file* file)
+int input_flush_device(struct input_handle *handle, struct file *file)
 {
-	if (handle->dev->flush)
-		return handle->dev->flush(handle->dev, file);
+	struct input_dev *dev = handle->dev;
+	int retval;
 
-	return 0;
+	retval = mutex_lock_interruptible(&dev->mutex);
+	if (retval)
+		return retval;
+
+	if (dev->flush)
+		retval = dev->flush(dev, file);
+
+	mutex_unlock(&dev->mutex);
+	return retval;
 }
 EXPORT_SYMBOL(input_flush_device);
 
+/**
+ * input_close_device - close input device
+ * @handle: handle through which device is being accessed
+ *
+ * This function should be called by input handlers when they
+ * want to stop receive events from given input device.
+ */
 void input_close_device(struct input_handle *handle)
 {
 	struct input_dev *dev = handle->dev;
 
-	input_release_device(handle);
-
 	mutex_lock(&dev->mutex);
 
+	__input_release_device(handle);
+
 	if (!--dev->users && dev->close)
 		dev->close(dev);
-	handle->open--;
+
+	if (!--handle->open) {
+		/*
+		 * synchronize_rcu() makes sure that input_pass_event()
+		 * completed and that no more input events are delivered
+		 * through this handle
+		 */
+		synchronize_rcu();
+	}
 
 	mutex_unlock(&dev->mutex);
 }
 EXPORT_SYMBOL(input_close_device);
 
+/*
+ * Prepare device for unregistering
+ */
+static void input_disconnect_device(struct input_dev *dev)
+{
+	struct input_handle *handle;
+	int code;
+
+	/*
+	 * Mark device as going away. Note that we take dev->mutex here
+	 * not to protect access to dev->going_away but rather to ensure
+	 * that there are no threads in the middle of input_open_device()
+	 */
+	mutex_lock(&dev->mutex);
+	dev->going_away = 1;
+	mutex_unlock(&dev->mutex);
+
+	spin_lock_irq(&dev->event_lock);
+
+	/*
+	 * Simulate keyup events for all pressed keys so that handlers
+	 * are not left with "stuck" keys. The driver may continue
+	 * generate events even after we done here but they will not
+	 * reach any handlers.
+	 */
+	if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) {
+		for (code = 0; code <= KEY_MAX; code++) {
+			if (is_event_supported(code, dev->keybit, KEY_MAX) &&
+			    test_bit(code, dev->key)) {
+				input_pass_event(dev, EV_KEY, code, 0);
+			}
+		}
+		input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
+	}
+
+	list_for_each_entry(handle, &dev->h_list, d_node)
+		handle->open = 0;
+
+	spin_unlock_irq(&dev->event_lock);
+}
+
 static int input_fetch_keycode(struct input_dev *dev, int scancode)
 {
 	switch (dev->keycodesize) {
@@ -473,7 +677,8 @@
 
 static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	/* acquire lock here ... Yes, we do need locking, I knowi, I know... */
+	if (mutex_lock_interruptible(&input_mutex))
+		return NULL;
 
 	return seq_list_start(&input_dev_list, *pos);
 }
@@ -485,7 +690,7 @@
 
 static void input_devices_seq_stop(struct seq_file *seq, void *v)
 {
-	/* release lock here */
+	mutex_unlock(&input_mutex);
 }
 
 static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
@@ -569,7 +774,9 @@
 
 static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	/* acquire lock here ... Yes, we do need locking, I knowi, I know... */
+	if (mutex_lock_interruptible(&input_mutex))
+		return NULL;
+
 	seq->private = (void *)(unsigned long)*pos;
 	return seq_list_start(&input_handler_list, *pos);
 }
@@ -582,7 +789,7 @@
 
 static void input_handlers_seq_stop(struct seq_file *seq, void *v)
 {
-	/* release lock here */
+	mutex_unlock(&input_mutex);
 }
 
 static int input_handlers_seq_show(struct seq_file *seq, void *v)
@@ -983,6 +1190,7 @@
 		dev->dev.class = &input_class;
 		device_initialize(&dev->dev);
 		mutex_init(&dev->mutex);
+		spin_lock_init(&dev->event_lock);
 		INIT_LIST_HEAD(&dev->h_list);
 		INIT_LIST_HEAD(&dev->node);
 
@@ -1000,7 +1208,7 @@
  * This function should only be used if input_register_device()
  * was not called yet or if it failed. Once device was registered
  * use input_unregister_device() and memory will be freed once last
- * refrence to the device is dropped.
+ * reference to the device is dropped.
  *
  * Device should be allocated by input_allocate_device().
  *
@@ -1070,6 +1278,18 @@
 }
 EXPORT_SYMBOL(input_set_capability);
 
+/**
+ * input_register_device - register device with input core
+ * @dev: device to be registered
+ *
+ * This function registers device with input core. The device must be
+ * allocated with input_allocate_device() and all it's capabilities
+ * set up before registering.
+ * If function fails the device must be freed with input_free_device().
+ * Once device has been successfully registered it can be unregistered
+ * with input_unregister_device(); input_free_device() should not be
+ * called in this case.
+ */
 int input_register_device(struct input_dev *dev)
 {
 	static atomic_t input_no = ATOMIC_INIT(0);
@@ -1077,7 +1297,7 @@
 	const char *path;
 	int error;
 
-	set_bit(EV_SYN, dev->evbit);
+	__set_bit(EV_SYN, dev->evbit);
 
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
@@ -1098,8 +1318,6 @@
 	if (!dev->setkeycode)
 		dev->setkeycode = input_default_setkeycode;
 
-	list_add_tail(&dev->node, &input_dev_list);
-
 	snprintf(dev->dev.bus_id, sizeof(dev->dev.bus_id),
 		 "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1);
 
@@ -1115,49 +1333,79 @@
 		dev->name ? dev->name : "Unspecified device", path ? path : "N/A");
 	kfree(path);
 
+	error = mutex_lock_interruptible(&input_mutex);
+	if (error) {
+		device_del(&dev->dev);
+		return error;
+	}
+
+	list_add_tail(&dev->node, &input_dev_list);
+
 	list_for_each_entry(handler, &input_handler_list, node)
 		input_attach_handler(dev, handler);
 
 	input_wakeup_procfs_readers();
 
+	mutex_unlock(&input_mutex);
+
 	return 0;
 }
 EXPORT_SYMBOL(input_register_device);
 
+/**
+ * input_unregister_device - unregister previously registered device
+ * @dev: device to be unregistered
+ *
+ * This function unregisters an input device. Once device is unregistered
+ * the caller should not try to access it as it may get freed at any moment.
+ */
 void input_unregister_device(struct input_dev *dev)
 {
 	struct input_handle *handle, *next;
-	int code;
 
-	for (code = 0; code <= KEY_MAX; code++)
-		if (test_bit(code, dev->key))
-			input_report_key(dev, code, 0);
-	input_sync(dev);
+	input_disconnect_device(dev);
 
-	del_timer_sync(&dev->timer);
+	mutex_lock(&input_mutex);
 
 	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
 		handle->handler->disconnect(handle);
 	WARN_ON(!list_empty(&dev->h_list));
 
+	del_timer_sync(&dev->timer);
 	list_del_init(&dev->node);
 
-	device_unregister(&dev->dev);
-
 	input_wakeup_procfs_readers();
+
+	mutex_unlock(&input_mutex);
+
+	device_unregister(&dev->dev);
 }
 EXPORT_SYMBOL(input_unregister_device);
 
+/**
+ * input_register_handler - register a new input handler
+ * @handler: handler to be registered
+ *
+ * This function registers a new input handler (interface) for input
+ * devices in the system and attaches it to all input devices that
+ * are compatible with the handler.
+ */
 int input_register_handler(struct input_handler *handler)
 {
 	struct input_dev *dev;
+	int retval;
+
+	retval = mutex_lock_interruptible(&input_mutex);
+	if (retval)
+		return retval;
 
 	INIT_LIST_HEAD(&handler->h_list);
 
 	if (handler->fops != NULL) {
-		if (input_table[handler->minor >> 5])
-			return -EBUSY;
-
+		if (input_table[handler->minor >> 5]) {
+			retval = -EBUSY;
+			goto out;
+		}
 		input_table[handler->minor >> 5] = handler;
 	}
 
@@ -1167,14 +1415,26 @@
 		input_attach_handler(dev, handler);
 
 	input_wakeup_procfs_readers();
-	return 0;
+
+ out:
+	mutex_unlock(&input_mutex);
+	return retval;
 }
 EXPORT_SYMBOL(input_register_handler);
 
+/**
+ * input_unregister_handler - unregisters an input handler
+ * @handler: handler to be unregistered
+ *
+ * This function disconnects a handler from its input devices and
+ * removes it from lists of known handlers.
+ */
 void input_unregister_handler(struct input_handler *handler)
 {
 	struct input_handle *handle, *next;
 
+	mutex_lock(&input_mutex);
+
 	list_for_each_entry_safe(handle, next, &handler->h_list, h_node)
 		handler->disconnect(handle);
 	WARN_ON(!list_empty(&handler->h_list));
@@ -1185,14 +1445,45 @@
 		input_table[handler->minor >> 5] = NULL;
 
 	input_wakeup_procfs_readers();
+
+	mutex_unlock(&input_mutex);
 }
 EXPORT_SYMBOL(input_unregister_handler);
 
+/**
+ * input_register_handle - register a new input handle
+ * @handle: handle to register
+ *
+ * This function puts a new input handle onto device's
+ * and handler's lists so that events can flow through
+ * it once it is opened using input_open_device().
+ *
+ * This function is supposed to be called from handler's
+ * connect() method.
+ */
 int input_register_handle(struct input_handle *handle)
 {
 	struct input_handler *handler = handle->handler;
+	struct input_dev *dev = handle->dev;
+	int error;
 
-	list_add_tail(&handle->d_node, &handle->dev->h_list);
+	/*
+	 * We take dev->mutex here to prevent race with
+	 * input_release_device().
+	 */
+	error = mutex_lock_interruptible(&dev->mutex);
+	if (error)
+		return error;
+	list_add_tail_rcu(&handle->d_node, &dev->h_list);
+	mutex_unlock(&dev->mutex);
+	synchronize_rcu();
+
+	/*
+	 * Since we are supposed to be called from ->connect()
+	 * which is mutually exclusive with ->disconnect()
+	 * we can't be racing with input_unregister_handle()
+	 * and so separate lock is not needed here.
+	 */
 	list_add_tail(&handle->h_node, &handler->h_list);
 
 	if (handler->start)
@@ -1202,10 +1493,29 @@
 }
 EXPORT_SYMBOL(input_register_handle);
 
+/**
+ * input_unregister_handle - unregister an input handle
+ * @handle: handle to unregister
+ *
+ * This function removes input handle from device's
+ * and handler's lists.
+ *
+ * This function is supposed to be called from handler's
+ * disconnect() method.
+ */
 void input_unregister_handle(struct input_handle *handle)
 {
+	struct input_dev *dev = handle->dev;
+
 	list_del_init(&handle->h_node);
-	list_del_init(&handle->d_node);
+
+	/*
+	 * Take dev->mutex to prevent race with input_release_device().
+	 */
+	mutex_lock(&dev->mutex);
+	list_del_rcu(&handle->d_node);
+	mutex_unlock(&dev->mutex);
+	synchronize_rcu();
 }
 EXPORT_SYMBOL(input_unregister_handle);
 
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index a9a0180..2b201f9 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -43,6 +43,8 @@
 	struct input_handle handle;
 	wait_queue_head_t wait;
 	struct list_head client_list;
+	spinlock_t client_lock; /* protects client_list */
+	struct mutex mutex;
 	struct device dev;
 
 	struct js_corr corr[ABS_MAX + 1];
@@ -61,31 +63,61 @@
 	int head;
 	int tail;
 	int startup;
+	spinlock_t buffer_lock; /* protects access to buffer, head and tail */
 	struct fasync_struct *fasync;
 	struct joydev *joydev;
 	struct list_head node;
 };
 
 static struct joydev *joydev_table[JOYDEV_MINORS];
+static DEFINE_MUTEX(joydev_table_mutex);
 
 static int joydev_correct(int value, struct js_corr *corr)
 {
 	switch (corr->type) {
-		case JS_CORR_NONE:
-			break;
-		case JS_CORR_BROKEN:
-			value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 :
-				((corr->coef[3] * (value - corr->coef[1])) >> 14)) :
-				((corr->coef[2] * (value - corr->coef[0])) >> 14);
-			break;
-		default:
-			return 0;
+
+	case JS_CORR_NONE:
+		break;
+
+	case JS_CORR_BROKEN:
+		value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 :
+			((corr->coef[3] * (value - corr->coef[1])) >> 14)) :
+			((corr->coef[2] * (value - corr->coef[0])) >> 14);
+		break;
+
+	default:
+		return 0;
 	}
 
 	return value < -32767 ? -32767 : (value > 32767 ? 32767 : value);
 }
 
-static void joydev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+static void joydev_pass_event(struct joydev_client *client,
+			      struct js_event *event)
+{
+	struct joydev *joydev = client->joydev;
+
+	/*
+	 * IRQs already disabled, just acquire the lock
+	 */
+	spin_lock(&client->buffer_lock);
+
+	client->buffer[client->head] = *event;
+
+	if (client->startup == joydev->nabs + joydev->nkey) {
+		client->head++;
+		client->head &= JOYDEV_BUFFER_SIZE - 1;
+		if (client->tail == client->head)
+			client->startup = 0;
+	}
+
+	spin_unlock(&client->buffer_lock);
+
+	kill_fasync(&client->fasync, SIGIO, POLL_IN);
+}
+
+static void joydev_event(struct input_handle *handle,
+			 unsigned int type, unsigned int code, int value)
 {
 	struct joydev *joydev = handle->private;
 	struct joydev_client *client;
@@ -93,39 +125,34 @@
 
 	switch (type) {
 
-		case EV_KEY:
-			if (code < BTN_MISC || value == 2)
-				return;
-			event.type = JS_EVENT_BUTTON;
-			event.number = joydev->keymap[code - BTN_MISC];
-			event.value = value;
-			break;
-
-		case EV_ABS:
-			event.type = JS_EVENT_AXIS;
-			event.number = joydev->absmap[code];
-			event.value = joydev_correct(value, joydev->corr + event.number);
-			if (event.value == joydev->abs[event.number])
-				return;
-			joydev->abs[event.number] = event.value;
-			break;
-
-		default:
+	case EV_KEY:
+		if (code < BTN_MISC || value == 2)
 			return;
+		event.type = JS_EVENT_BUTTON;
+		event.number = joydev->keymap[code - BTN_MISC];
+		event.value = value;
+		break;
+
+	case EV_ABS:
+		event.type = JS_EVENT_AXIS;
+		event.number = joydev->absmap[code];
+		event.value = joydev_correct(value,
+					&joydev->corr[event.number]);
+		if (event.value == joydev->abs[event.number])
+			return;
+		joydev->abs[event.number] = event.value;
+		break;
+
+	default:
+		return;
 	}
 
 	event.time = jiffies_to_msecs(jiffies);
 
-	list_for_each_entry(client, &joydev->client_list, node) {
-
-		memcpy(client->buffer + client->head, &event, sizeof(struct js_event));
-
-		if (client->startup == joydev->nabs + joydev->nkey)
-			if (client->tail == (client->head = (client->head + 1) & (JOYDEV_BUFFER_SIZE - 1)))
-				client->startup = 0;
-
-		kill_fasync(&client->fasync, SIGIO, POLL_IN);
-	}
+	rcu_read_lock();
+	list_for_each_entry_rcu(client, &joydev->client_list, node)
+		joydev_pass_event(client, &event);
+	rcu_read_unlock();
 
 	wake_up_interruptible(&joydev->wait);
 }
@@ -144,23 +171,83 @@
 {
 	struct joydev *joydev = container_of(dev, struct joydev, dev);
 
-	joydev_table[joydev->minor] = NULL;
 	kfree(joydev);
 }
 
+static void joydev_attach_client(struct joydev *joydev,
+				 struct joydev_client *client)
+{
+	spin_lock(&joydev->client_lock);
+	list_add_tail_rcu(&client->node, &joydev->client_list);
+	spin_unlock(&joydev->client_lock);
+	synchronize_rcu();
+}
+
+static void joydev_detach_client(struct joydev *joydev,
+				 struct joydev_client *client)
+{
+	spin_lock(&joydev->client_lock);
+	list_del_rcu(&client->node);
+	spin_unlock(&joydev->client_lock);
+	synchronize_rcu();
+}
+
+static int joydev_open_device(struct joydev *joydev)
+{
+	int retval;
+
+	retval = mutex_lock_interruptible(&joydev->mutex);
+	if (retval)
+		return retval;
+
+	if (!joydev->exist)
+		retval = -ENODEV;
+	else if (!joydev->open++) {
+		retval = input_open_device(&joydev->handle);
+		if (retval)
+			joydev->open--;
+	}
+
+	mutex_unlock(&joydev->mutex);
+	return retval;
+}
+
+static void joydev_close_device(struct joydev *joydev)
+{
+	mutex_lock(&joydev->mutex);
+
+	if (joydev->exist && !--joydev->open)
+		input_close_device(&joydev->handle);
+
+	mutex_unlock(&joydev->mutex);
+}
+
+/*
+ * Wake up users waiting for IO so they can disconnect from
+ * dead device.
+ */
+static void joydev_hangup(struct joydev *joydev)
+{
+	struct joydev_client *client;
+
+	spin_lock(&joydev->client_lock);
+	list_for_each_entry(client, &joydev->client_list, node)
+		kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+	spin_unlock(&joydev->client_lock);
+
+	wake_up_interruptible(&joydev->wait);
+}
+
 static int joydev_release(struct inode *inode, struct file *file)
 {
 	struct joydev_client *client = file->private_data;
 	struct joydev *joydev = client->joydev;
 
 	joydev_fasync(-1, file, 0);
-
-	list_del(&client->node);
+	joydev_detach_client(joydev, client);
 	kfree(client);
 
-	if (!--joydev->open && joydev->exist)
-		input_close_device(&joydev->handle);
-
+	joydev_close_device(joydev);
 	put_device(&joydev->dev);
 
 	return 0;
@@ -176,11 +263,16 @@
 	if (i >= JOYDEV_MINORS)
 		return -ENODEV;
 
+	error = mutex_lock_interruptible(&joydev_table_mutex);
+	if (error)
+		return error;
 	joydev = joydev_table[i];
-	if (!joydev || !joydev->exist)
-		return -ENODEV;
+	if (joydev)
+		get_device(&joydev->dev);
+	mutex_unlock(&joydev_table_mutex);
 
-	get_device(&joydev->dev);
+	if (!joydev)
+		return -ENODEV;
 
 	client = kzalloc(sizeof(struct joydev_client), GFP_KERNEL);
 	if (!client) {
@@ -188,37 +280,129 @@
 		goto err_put_joydev;
 	}
 
+	spin_lock_init(&client->buffer_lock);
 	client->joydev = joydev;
-	list_add_tail(&client->node, &joydev->client_list);
+	joydev_attach_client(joydev, client);
 
-	if (!joydev->open++ && joydev->exist) {
-		error = input_open_device(&joydev->handle);
-		if (error)
-			goto err_free_client;
-	}
+	error = joydev_open_device(joydev);
+	if (error)
+		goto err_free_client;
 
 	file->private_data = client;
 	return 0;
 
  err_free_client:
-	list_del(&client->node);
+	joydev_detach_client(joydev, client);
 	kfree(client);
  err_put_joydev:
 	put_device(&joydev->dev);
 	return error;
 }
 
-static ssize_t joydev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+static int joydev_generate_startup_event(struct joydev_client *client,
+					 struct input_dev *input,
+					 struct js_event *event)
 {
-	return -EINVAL;
+	struct joydev *joydev = client->joydev;
+	int have_event;
+
+	spin_lock_irq(&client->buffer_lock);
+
+	have_event = client->startup < joydev->nabs + joydev->nkey;
+
+	if (have_event) {
+
+		event->time = jiffies_to_msecs(jiffies);
+		if (client->startup < joydev->nkey) {
+			event->type = JS_EVENT_BUTTON | JS_EVENT_INIT;
+			event->number = client->startup;
+			event->value = !!test_bit(joydev->keypam[event->number],
+						  input->key);
+		} else {
+			event->type = JS_EVENT_AXIS | JS_EVENT_INIT;
+			event->number = client->startup - joydev->nkey;
+			event->value = joydev->abs[event->number];
+		}
+		client->startup++;
+	}
+
+	spin_unlock_irq(&client->buffer_lock);
+
+	return have_event;
 }
 
-static ssize_t joydev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static int joydev_fetch_next_event(struct joydev_client *client,
+				   struct js_event *event)
+{
+	int have_event;
+
+	spin_lock_irq(&client->buffer_lock);
+
+	have_event = client->head != client->tail;
+	if (have_event) {
+		*event = client->buffer[client->tail++];
+		client->tail &= JOYDEV_BUFFER_SIZE - 1;
+	}
+
+	spin_unlock_irq(&client->buffer_lock);
+
+	return have_event;
+}
+
+/*
+ * Old joystick interface
+ */
+static ssize_t joydev_0x_read(struct joydev_client *client,
+			      struct input_dev *input,
+			      char __user *buf)
+{
+	struct joydev *joydev = client->joydev;
+	struct JS_DATA_TYPE data;
+	int i;
+
+	spin_lock_irq(&input->event_lock);
+
+	/*
+	 * Get device state
+	 */
+	for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++)
+		data.buttons |=
+			test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0;
+	data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x;
+	data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y;
+
+	/*
+	 * Reset reader's event queue
+	 */
+	spin_lock(&client->buffer_lock);
+	client->startup = 0;
+	client->tail = client->head;
+	spin_unlock(&client->buffer_lock);
+
+	spin_unlock_irq(&input->event_lock);
+
+	if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE)))
+		return -EFAULT;
+
+	return sizeof(struct JS_DATA_TYPE);
+}
+
+static inline int joydev_data_pending(struct joydev_client *client)
+{
+	struct joydev *joydev = client->joydev;
+
+	return client->startup < joydev->nabs + joydev->nkey ||
+		client->head != client->tail;
+}
+
+static ssize_t joydev_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
 {
 	struct joydev_client *client = file->private_data;
 	struct joydev *joydev = client->joydev;
 	struct input_dev *input = joydev->handle.dev;
-	int retval = 0;
+	struct js_event event;
+	int retval;
 
 	if (!joydev->exist)
 		return -ENODEV;
@@ -226,68 +410,35 @@
 	if (count < sizeof(struct js_event))
 		return -EINVAL;
 
-	if (count == sizeof(struct JS_DATA_TYPE)) {
+	if (count == sizeof(struct JS_DATA_TYPE))
+		return joydev_0x_read(client, input, buf);
 
-		struct JS_DATA_TYPE data;
-		int i;
-
-		for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++)
-			data.buttons |= test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0;
-		data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x;
-		data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y;
-
-		if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE)))
-			return -EFAULT;
-
-		client->startup = 0;
-		client->tail = client->head;
-
-		return sizeof(struct JS_DATA_TYPE);
-	}
-
-	if (client->startup == joydev->nabs + joydev->nkey &&
-	    client->head == client->tail && (file->f_flags & O_NONBLOCK))
+	if (!joydev_data_pending(client) && (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	retval = wait_event_interruptible(joydev->wait,
-					  !joydev->exist ||
-					  client->startup < joydev->nabs + joydev->nkey ||
-					  client->head != client->tail);
+			!joydev->exist || joydev_data_pending(client));
 	if (retval)
 		return retval;
 
 	if (!joydev->exist)
 		return -ENODEV;
 
-	while (client->startup < joydev->nabs + joydev->nkey && retval + sizeof(struct js_event) <= count) {
-
-		struct js_event event;
-
-		event.time = jiffies_to_msecs(jiffies);
-
-		if (client->startup < joydev->nkey) {
-			event.type = JS_EVENT_BUTTON | JS_EVENT_INIT;
-			event.number = client->startup;
-			event.value = !!test_bit(joydev->keypam[event.number], input->key);
-		} else {
-			event.type = JS_EVENT_AXIS | JS_EVENT_INIT;
-			event.number = client->startup - joydev->nkey;
-			event.value = joydev->abs[event.number];
-		}
+	while (retval + sizeof(struct js_event) <= count &&
+	       joydev_generate_startup_event(client, input, &event)) {
 
 		if (copy_to_user(buf + retval, &event, sizeof(struct js_event)))
 			return -EFAULT;
 
-		client->startup++;
 		retval += sizeof(struct js_event);
 	}
 
-	while (client->head != client->tail && retval + sizeof(struct js_event) <= count) {
+	while (retval + sizeof(struct js_event) <= count &&
+	       joydev_fetch_next_event(client, &event)) {
 
-		if (copy_to_user(buf + retval, client->buffer + client->tail, sizeof(struct js_event)))
+		if (copy_to_user(buf + retval, &event, sizeof(struct js_event)))
 			return -EFAULT;
 
-		client->tail = (client->tail + 1) & (JOYDEV_BUFFER_SIZE - 1);
 		retval += sizeof(struct js_event);
 	}
 
@@ -301,126 +452,144 @@
 	struct joydev *joydev = client->joydev;
 
 	poll_wait(file, &joydev->wait, wait);
-	return ((client->head != client->tail || client->startup < joydev->nabs + joydev->nkey) ?
-		(POLLIN | POLLRDNORM) : 0) | (joydev->exist ? 0 : (POLLHUP | POLLERR));
+	return (joydev_data_pending(client) ? (POLLIN | POLLRDNORM) : 0) |
+		(joydev->exist ?  0 : (POLLHUP | POLLERR));
 }
 
-static int joydev_ioctl_common(struct joydev *joydev, unsigned int cmd, void __user *argp)
+static int joydev_ioctl_common(struct joydev *joydev,
+				unsigned int cmd, void __user *argp)
 {
 	struct input_dev *dev = joydev->handle.dev;
 	int i, j;
 
 	switch (cmd) {
 
-		case JS_SET_CAL:
-			return copy_from_user(&joydev->glue.JS_CORR, argp,
+	case JS_SET_CAL:
+		return copy_from_user(&joydev->glue.JS_CORR, argp,
 				sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
 
-		case JS_GET_CAL:
-			return copy_to_user(argp, &joydev->glue.JS_CORR,
+	case JS_GET_CAL:
+		return copy_to_user(argp, &joydev->glue.JS_CORR,
 				sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
 
-		case JS_SET_TIMEOUT:
-			return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
+	case JS_SET_TIMEOUT:
+		return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
 
-		case JS_GET_TIMEOUT:
-			return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
+	case JS_GET_TIMEOUT:
+		return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
 
-		case JSIOCGVERSION:
-			return put_user(JS_VERSION, (__u32 __user *) argp);
+	case JSIOCGVERSION:
+		return put_user(JS_VERSION, (__u32 __user *) argp);
 
-		case JSIOCGAXES:
-			return put_user(joydev->nabs, (__u8 __user *) argp);
+	case JSIOCGAXES:
+		return put_user(joydev->nabs, (__u8 __user *) argp);
 
-		case JSIOCGBUTTONS:
-			return put_user(joydev->nkey, (__u8 __user *) argp);
+	case JSIOCGBUTTONS:
+		return put_user(joydev->nkey, (__u8 __user *) argp);
 
-		case JSIOCSCORR:
-			if (copy_from_user(joydev->corr, argp,
-				      sizeof(joydev->corr[0]) * joydev->nabs))
-			    return -EFAULT;
-			for (i = 0; i < joydev->nabs; i++) {
-				j = joydev->abspam[i];
-			        joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i);
-			}
-			return 0;
+	case JSIOCSCORR:
+		if (copy_from_user(joydev->corr, argp,
+			      sizeof(joydev->corr[0]) * joydev->nabs))
+		    return -EFAULT;
 
-		case JSIOCGCORR:
-			return copy_to_user(argp, joydev->corr,
-						sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0;
+		for (i = 0; i < joydev->nabs; i++) {
+			j = joydev->abspam[i];
+			joydev->abs[i] = joydev_correct(dev->abs[j],
+							&joydev->corr[i]);
+		}
+		return 0;
 
-		case JSIOCSAXMAP:
-			if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1)))
+	case JSIOCGCORR:
+		return copy_to_user(argp, joydev->corr,
+			sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0;
+
+	case JSIOCSAXMAP:
+		if (copy_from_user(joydev->abspam, argp,
+				   sizeof(__u8) * (ABS_MAX + 1)))
+			return -EFAULT;
+
+		for (i = 0; i < joydev->nabs; i++) {
+			if (joydev->abspam[i] > ABS_MAX)
+				return -EINVAL;
+			joydev->absmap[joydev->abspam[i]] = i;
+		}
+		return 0;
+
+	case JSIOCGAXMAP:
+		return copy_to_user(argp, joydev->abspam,
+			sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0;
+
+	case JSIOCSBTNMAP:
+		if (copy_from_user(joydev->keypam, argp,
+				   sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)))
+			return -EFAULT;
+
+		for (i = 0; i < joydev->nkey; i++) {
+			if (joydev->keypam[i] > KEY_MAX ||
+			    joydev->keypam[i] < BTN_MISC)
+				return -EINVAL;
+			joydev->keymap[joydev->keypam[i] - BTN_MISC] = i;
+		}
+
+		return 0;
+
+	case JSIOCGBTNMAP:
+		return copy_to_user(argp, joydev->keypam,
+			sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0;
+
+	default:
+		if ((cmd & ~IOCSIZE_MASK) == JSIOCGNAME(0)) {
+			int len;
+			if (!dev->name)
+				return 0;
+			len = strlen(dev->name) + 1;
+			if (len > _IOC_SIZE(cmd))
+				len = _IOC_SIZE(cmd);
+			if (copy_to_user(argp, dev->name, len))
 				return -EFAULT;
-			for (i = 0; i < joydev->nabs; i++) {
-				if (joydev->abspam[i] > ABS_MAX)
-					return -EINVAL;
-				joydev->absmap[joydev->abspam[i]] = i;
-			}
-			return 0;
-
-		case JSIOCGAXMAP:
-			return copy_to_user(argp, joydev->abspam,
-						sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0;
-
-		case JSIOCSBTNMAP:
-			if (copy_from_user(joydev->keypam, argp, sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)))
-				return -EFAULT;
-			for (i = 0; i < joydev->nkey; i++) {
-				if (joydev->keypam[i] > KEY_MAX || joydev->keypam[i] < BTN_MISC)
-					return -EINVAL;
-				joydev->keymap[joydev->keypam[i] - BTN_MISC] = i;
-			}
-			return 0;
-
-		case JSIOCGBTNMAP:
-			return copy_to_user(argp, joydev->keypam,
-						sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0;
-
-		default:
-			if ((cmd & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) == JSIOCGNAME(0)) {
-				int len;
-				if (!dev->name)
-					return 0;
-				len = strlen(dev->name) + 1;
-				if (len > _IOC_SIZE(cmd))
-					len = _IOC_SIZE(cmd);
-				if (copy_to_user(argp, dev->name, len))
-					return -EFAULT;
-				return len;
-			}
+			return len;
+		}
 	}
 	return -EINVAL;
 }
 
 #ifdef CONFIG_COMPAT
-static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long joydev_compat_ioctl(struct file *file,
+				unsigned int cmd, unsigned long arg)
 {
 	struct joydev_client *client = file->private_data;
 	struct joydev *joydev = client->joydev;
 	void __user *argp = (void __user *)arg;
 	s32 tmp32;
 	struct JS_DATA_SAVE_TYPE_32 ds32;
-	int err;
+	int retval;
 
-	if (!joydev->exist)
-		return -ENODEV;
+	retval = mutex_lock_interruptible(&joydev->mutex);
+	if (retval)
+		return retval;
 
-	switch(cmd) {
+	if (!joydev->exist) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	switch (cmd) {
+
 	case JS_SET_TIMELIMIT:
-		err = get_user(tmp32, (s32 __user *) arg);
-		if (err == 0)
+		retval = get_user(tmp32, (s32 __user *) arg);
+		if (retval == 0)
 			joydev->glue.JS_TIMELIMIT = tmp32;
 		break;
+
 	case JS_GET_TIMELIMIT:
 		tmp32 = joydev->glue.JS_TIMELIMIT;
-		err = put_user(tmp32, (s32 __user *) arg);
+		retval = put_user(tmp32, (s32 __user *) arg);
 		break;
 
 	case JS_SET_ALL:
-		err = copy_from_user(&ds32, argp,
-				     sizeof(ds32)) ? -EFAULT : 0;
-		if (err == 0) {
+		retval = copy_from_user(&ds32, argp,
+					sizeof(ds32)) ? -EFAULT : 0;
+		if (retval == 0) {
 			joydev->glue.JS_TIMEOUT    = ds32.JS_TIMEOUT;
 			joydev->glue.BUSY          = ds32.BUSY;
 			joydev->glue.JS_EXPIRETIME = ds32.JS_EXPIRETIME;
@@ -438,55 +607,119 @@
 		ds32.JS_SAVE       = joydev->glue.JS_SAVE;
 		ds32.JS_CORR       = joydev->glue.JS_CORR;
 
-		err = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0;
+		retval = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0;
 		break;
 
 	default:
-		err = joydev_ioctl_common(joydev, cmd, argp);
+		retval = joydev_ioctl_common(joydev, cmd, argp);
+		break;
 	}
-	return err;
+
+ out:
+	mutex_unlock(&joydev->mutex);
+	return retval;
 }
 #endif /* CONFIG_COMPAT */
 
-static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long joydev_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
 {
 	struct joydev_client *client = file->private_data;
 	struct joydev *joydev = client->joydev;
 	void __user *argp = (void __user *)arg;
+	int retval;
 
-	if (!joydev->exist)
-		return -ENODEV;
+	retval = mutex_lock_interruptible(&joydev->mutex);
+	if (retval)
+		return retval;
 
-	switch(cmd) {
-		case JS_SET_TIMELIMIT:
-			return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
-		case JS_GET_TIMELIMIT:
-			return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
-		case JS_SET_ALL:
-			return copy_from_user(&joydev->glue, argp,
-						sizeof(joydev->glue)) ? -EFAULT : 0;
-		case JS_GET_ALL:
-			return copy_to_user(argp, &joydev->glue,
-						sizeof(joydev->glue)) ? -EFAULT : 0;
-		default:
-			return joydev_ioctl_common(joydev, cmd, argp);
+	if (!joydev->exist) {
+		retval = -ENODEV;
+		goto out;
 	}
+
+	switch (cmd) {
+
+	case JS_SET_TIMELIMIT:
+		retval = get_user(joydev->glue.JS_TIMELIMIT,
+				  (long __user *) arg);
+		break;
+
+	case JS_GET_TIMELIMIT:
+		retval = put_user(joydev->glue.JS_TIMELIMIT,
+				  (long __user *) arg);
+		break;
+
+	case JS_SET_ALL:
+		retval = copy_from_user(&joydev->glue, argp,
+					sizeof(joydev->glue)) ? -EFAULT: 0;
+		break;
+
+	case JS_GET_ALL:
+		retval = copy_to_user(argp, &joydev->glue,
+				      sizeof(joydev->glue)) ? -EFAULT : 0;
+		break;
+
+	default:
+		retval = joydev_ioctl_common(joydev, cmd, argp);
+		break;
+	}
+ out:
+	mutex_unlock(&joydev->mutex);
+	return retval;
 }
 
 static const struct file_operations joydev_fops = {
-	.owner =	THIS_MODULE,
-	.read =		joydev_read,
-	.write =	joydev_write,
-	.poll =		joydev_poll,
-	.open =		joydev_open,
-	.release =	joydev_release,
-	.ioctl =	joydev_ioctl,
+	.owner		= THIS_MODULE,
+	.read		= joydev_read,
+	.poll		= joydev_poll,
+	.open		= joydev_open,
+	.release	= joydev_release,
+	.unlocked_ioctl	= joydev_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl =	joydev_compat_ioctl,
+	.compat_ioctl	= joydev_compat_ioctl,
 #endif
-	.fasync =	joydev_fasync,
+	.fasync		= joydev_fasync,
 };
 
+static int joydev_install_chrdev(struct joydev *joydev)
+{
+	joydev_table[joydev->minor] = joydev;
+	return 0;
+}
+
+static void joydev_remove_chrdev(struct joydev *joydev)
+{
+	mutex_lock(&joydev_table_mutex);
+	joydev_table[joydev->minor] = NULL;
+	mutex_unlock(&joydev_table_mutex);
+}
+
+/*
+ * Mark device non-existant. This disables writes, ioctls and
+ * prevents new users from opening the device. Already posted
+ * blocking reads will stay, however new ones will fail.
+ */
+static void joydev_mark_dead(struct joydev *joydev)
+{
+	mutex_lock(&joydev->mutex);
+	joydev->exist = 0;
+	mutex_unlock(&joydev->mutex);
+}
+
+static void joydev_cleanup(struct joydev *joydev)
+{
+	struct input_handle *handle = &joydev->handle;
+
+	joydev_mark_dead(joydev);
+	joydev_hangup(joydev);
+	joydev_remove_chrdev(joydev);
+
+	/* joydev is marked dead so noone else accesses joydev->open */
+	if (joydev->open)
+		input_close_device(handle);
+}
+
 static int joydev_connect(struct input_handler *handler, struct input_dev *dev,
 			  const struct input_device_id *id)
 {
@@ -494,7 +727,10 @@
 	int i, j, t, minor;
 	int error;
 
-	for (minor = 0; minor < JOYDEV_MINORS && joydev_table[minor]; minor++);
+	for (minor = 0; minor < JOYDEV_MINORS; minor++)
+		if (!joydev_table[minor])
+			break;
+
 	if (minor == JOYDEV_MINORS) {
 		printk(KERN_ERR "joydev: no more free joydev devices\n");
 		return -ENFILE;
@@ -505,15 +741,19 @@
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&joydev->client_list);
+	spin_lock_init(&joydev->client_lock);
+	mutex_init(&joydev->mutex);
 	init_waitqueue_head(&joydev->wait);
 
+	snprintf(joydev->name, sizeof(joydev->name), "js%d", minor);
+	joydev->exist = 1;
 	joydev->minor = minor;
+
 	joydev->exist = 1;
 	joydev->handle.dev = dev;
 	joydev->handle.name = joydev->name;
 	joydev->handle.handler = handler;
 	joydev->handle.private = joydev;
-	snprintf(joydev->name, sizeof(joydev->name), "js%d", minor);
 
 	for (i = 0; i < ABS_MAX + 1; i++)
 		if (test_bit(i, dev->absbit)) {
@@ -545,67 +785,65 @@
 		}
 		joydev->corr[i].type = JS_CORR_BROKEN;
 		joydev->corr[i].prec = dev->absfuzz[j];
-		joydev->corr[i].coef[0] = (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j];
-		joydev->corr[i].coef[1] = (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j];
-		if (!(t = ((dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j])))
-			continue;
-		joydev->corr[i].coef[2] = (1 << 29) / t;
-		joydev->corr[i].coef[3] = (1 << 29) / t;
+		joydev->corr[i].coef[0] =
+			(dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j];
+		joydev->corr[i].coef[1] =
+			(dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j];
 
-		joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i);
+		t = (dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j];
+		if (t) {
+			joydev->corr[i].coef[2] = (1 << 29) / t;
+			joydev->corr[i].coef[3] = (1 << 29) / t;
+
+			joydev->abs[i] = joydev_correct(dev->abs[j],
+							joydev->corr + i);
+		}
 	}
 
-	snprintf(joydev->dev.bus_id, sizeof(joydev->dev.bus_id),
-		 "js%d", minor);
+	strlcpy(joydev->dev.bus_id, joydev->name, sizeof(joydev->dev.bus_id));
+	joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor);
 	joydev->dev.class = &input_class;
 	joydev->dev.parent = &dev->dev;
-	joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor);
 	joydev->dev.release = joydev_free;
 	device_initialize(&joydev->dev);
 
-	joydev_table[minor] = joydev;
-
-	error = device_add(&joydev->dev);
+	error = input_register_handle(&joydev->handle);
 	if (error)
 		goto err_free_joydev;
 
-	error = input_register_handle(&joydev->handle);
+	error = joydev_install_chrdev(joydev);
 	if (error)
-		goto err_delete_joydev;
+		goto err_unregister_handle;
+
+	error = device_add(&joydev->dev);
+	if (error)
+		goto err_cleanup_joydev;
 
 	return 0;
 
- err_delete_joydev:
-	device_del(&joydev->dev);
+ err_cleanup_joydev:
+	joydev_cleanup(joydev);
+ err_unregister_handle:
+	input_unregister_handle(&joydev->handle);
  err_free_joydev:
 	put_device(&joydev->dev);
 	return error;
 }
 
-
 static void joydev_disconnect(struct input_handle *handle)
 {
 	struct joydev *joydev = handle->private;
-	struct joydev_client *client;
 
-	input_unregister_handle(handle);
 	device_del(&joydev->dev);
-
-	joydev->exist = 0;
-
-	if (joydev->open) {
-		input_close_device(handle);
-		list_for_each_entry(client, &joydev->client_list, node)
-			kill_fasync(&client->fasync, SIGIO, POLL_HUP);
-		wake_up_interruptible(&joydev->wait);
-	}
-
+	joydev_cleanup(joydev);
+	input_unregister_handle(handle);
 	put_device(&joydev->dev);
 }
 
 static const struct input_device_id joydev_blacklist[] = {
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_KEYBIT,
 		.evbit = { BIT(EV_KEY) },
 		.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
 	},	/* Avoid itouchpads, touchscreens and tablets */
@@ -614,17 +852,20 @@
 
 static const struct input_device_id joydev_ids[] = {
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_ABSBIT,
 		.evbit = { BIT(EV_ABS) },
 		.absbit = { BIT(ABS_X) },
 	},
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_ABSBIT,
 		.evbit = { BIT(EV_ABS) },
 		.absbit = { BIT(ABS_WHEEL) },
 	},
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_ABSBIT,
 		.evbit = { BIT(EV_ABS) },
 		.absbit = { BIT(ABS_THROTTLE) },
 	},
@@ -634,14 +875,14 @@
 MODULE_DEVICE_TABLE(input, joydev_ids);
 
 static struct input_handler joydev_handler = {
-	.event =	joydev_event,
-	.connect =	joydev_connect,
-	.disconnect =	joydev_disconnect,
-	.fops =		&joydev_fops,
-	.minor =	JOYDEV_MINOR_BASE,
-	.name =		"joydev",
-	.id_table =	joydev_ids,
-	.blacklist =	joydev_blacklist,
+	.event		= joydev_event,
+	.connect	= joydev_connect,
+	.disconnect	= joydev_disconnect,
+	.fops		= &joydev_fops,
+	.minor		= JOYDEV_MINOR_BASE,
+	.name		= "joydev",
+	.id_table	= joydev_ids,
+	.blacklist	= joydev_blacklist,
 };
 
 static int __init joydev_init(void)
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 2808039..623629a 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -223,12 +223,16 @@
 	struct input_dev *dev = xpad->dev;
 
 	/* left stick */
-	input_report_abs(dev, ABS_X, (__s16) (((__s16)data[13] << 8) | data[12]));
-	input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[15] << 8) | data[14]));
+	input_report_abs(dev, ABS_X,
+			 (__s16) le16_to_cpup((__le16 *)(data + 12)));
+	input_report_abs(dev, ABS_Y,
+			 (__s16) le16_to_cpup((__le16 *)(data + 14)));
 
 	/* right stick */
-	input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[17] << 8) | data[16]));
-	input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[19] << 8) | data[18]));
+	input_report_abs(dev, ABS_RX,
+			 (__s16) le16_to_cpup((__le16 *)(data + 16)));
+	input_report_abs(dev, ABS_RY,
+			 (__s16) le16_to_cpup((__le16 *)(data + 18)));
 
 	/* triggers left/right */
 	input_report_abs(dev, ABS_Z, data[10]);
@@ -236,8 +240,10 @@
 
 	/* digital pad */
 	if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) {
-		input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04));
-		input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01));
+		input_report_abs(dev, ABS_HAT0X,
+				 !!(data[2] & 0x08) - !!(data[2] & 0x04));
+		input_report_abs(dev, ABS_HAT0Y,
+				 !!(data[2] & 0x02) - !!(data[2] & 0x01));
 	} else /* xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS */ {
 		input_report_key(dev, BTN_LEFT,  data[2] & 0x04);
 		input_report_key(dev, BTN_RIGHT, data[2] & 0x08);
@@ -274,14 +280,17 @@
  *		http://www.free60.org/wiki/Gamepad
  */
 
-static void xpad360_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
+static void xpad360_process_packet(struct usb_xpad *xpad,
+				   u16 cmd, unsigned char *data)
 {
 	struct input_dev *dev = xpad->dev;
 
 	/* digital pad */
 	if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) {
-		input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04));
-		input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01));
+		input_report_abs(dev, ABS_HAT0X,
+				 !!(data[2] & 0x08) - !!(data[2] & 0x04));
+		input_report_abs(dev, ABS_HAT0Y,
+				 !!(data[2] & 0x02) - !!(data[2] & 0x01));
 	} else if (xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS) {
 		/* dpad as buttons (right, left, down, up) */
 		input_report_key(dev, BTN_LEFT, data[2] & 0x04);
@@ -308,12 +317,16 @@
 	input_report_key(dev, BTN_MODE,	data[3] & 0x04);
 
 	/* left stick */
-	input_report_abs(dev, ABS_X, (__s16) (((__s16)data[7] << 8) | (__s16)data[6]));
-	input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[9] << 8) | (__s16)data[8]));
+	input_report_abs(dev, ABS_X,
+			 (__s16) le16_to_cpup((__le16 *)(data + 6)));
+	input_report_abs(dev, ABS_Y,
+			 (__s16) le16_to_cpup((__le16 *)(data + 8)));
 
 	/* right stick */
-	input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[11] << 8) | (__s16)data[10]));
-	input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[13] << 8) | (__s16)data[12]));
+	input_report_abs(dev, ABS_RX,
+			 (__s16) le16_to_cpup((__le16 *)(data + 10)));
+	input_report_abs(dev, ABS_RY,
+			 (__s16) le16_to_cpup((__le16 *)(data + 12)));
 
 	/* triggers left/right */
 	input_report_abs(dev, ABS_Z, data[4]);
@@ -335,10 +348,12 @@
 	case -ENOENT:
 	case -ESHUTDOWN:
 		/* this urb is terminated, clean up */
-		dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status);
+		dbg("%s - urb shutting down with status: %d",
+			__FUNCTION__, urb->status);
 		return;
 	default:
-		dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status);
+		dbg("%s - nonzero urb status received: %d",
+			__FUNCTION__, urb->status);
 		goto exit;
 	}
 
@@ -367,10 +382,12 @@
 		case -ENOENT:
 		case -ESHUTDOWN:
 			/* this urb is terminated, clean up */
-			dbg("%s - urb shutting down with status: %d",  __FUNCTION__, urb->status);
+			dbg("%s - urb shutting down with status: %d",
+				__FUNCTION__, urb->status);
 			return;
 		default:
-			dbg("%s - nonzero urb status received: %d",  __FUNCTION__, urb->status);
+			dbg("%s - nonzero urb status received: %d",
+				__FUNCTION__, urb->status);
 			goto exit;
 	}
 
@@ -378,7 +395,7 @@
 	retval = usb_submit_urb(urb, GFP_ATOMIC);
 	if (retval)
 		err("%s - usb_submit_urb failed with result %d",
-		   __FUNCTION__, retval);
+		    __FUNCTION__, retval);
 }
 
 static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
@@ -595,7 +612,7 @@
 
 static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
-	struct usb_device *udev = interface_to_usbdev (intf);
+	struct usb_device *udev = interface_to_usbdev(intf);
 	struct usb_xpad *xpad;
 	struct input_dev *input_dev;
 	struct usb_endpoint_descriptor *ep_irq_in;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index c97d5eb..2316a01 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -208,6 +208,27 @@
 	  This driver implements support for HIL-keyboards attached
 	  to your machine, so normally you should say Y here.
 
+config KEYBOARD_HP6XX
+	tristate "HP Jornada 6XX Keyboard support"
+	depends on SH_HP6XX
+	select INPUT_POLLDEV
+	help
+	  This adds support for the onboard keyboard found on
+	  HP Jornada 620/660/680/690.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called jornada680_kbd.
+
+config KEYBOARD_HP7XX
+	tristate "HP Jornada 7XX Keyboard Driver"
+	depends on SA1100_JORNADA720_SSP && SA1100_SSP
+	help
+	  Say Y here to add support for the HP Jornada 7xx (710/720/728)
+	  onboard keyboard.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called jornada720_kbd.
+
 config KEYBOARD_OMAP
 	tristate "TI OMAP keypad support"
 	depends on (ARCH_OMAP1 || ARCH_OMAP2)
@@ -253,4 +274,23 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called gpio-keys.
 
+config KEYBOARD_MAPLE
+	tristate "Maple bus keyboard"
+	depends on SH_DREAMCAST && MAPLE
+	help
+	  Say Y here if you have a Dreamcast console running Linux and have
+	  a keyboard attached to its Maple bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called maple_keyb.
+
+config KEYBOARD_BFIN
+	tristate "Blackfin BF54x keypad support"
+	depends on BF54x
+	help
+	  Say Y here if you want to use the BF54x keypad.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called bf54x-keys.
+
 endif
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 28d211b..e97455f 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -21,4 +21,7 @@
 obj-$(CONFIG_KEYBOARD_PXA27x)		+= pxa27x_keyboard.o
 obj-$(CONFIG_KEYBOARD_AAED2000)		+= aaed2000_kbd.o
 obj-$(CONFIG_KEYBOARD_GPIO)		+= gpio_keys.o
-
+obj-$(CONFIG_KEYBOARD_HP6XX)		+= jornada680_kbd.o
+obj-$(CONFIG_KEYBOARD_HP7XX)		+= jornada720_kbd.o
+obj-$(CONFIG_KEYBOARD_MAPLE)		+= maple_keyb.o
+obj-$(CONFIG_KEYBOARD_BFIN)		+= bf54x-keys.o
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index f948d3a..a180015 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -217,7 +217,7 @@
 
 static int __init atakbd_init(void)
 {
-	int i;
+	int i, error;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP))
 		return -EIO;
@@ -247,9 +247,10 @@
 	}
 
 	/* error check */
-	if (input_register_device(atakbd_dev)) {
+	error = input_register_device(atakbd_dev);
+	if (error) {
 		input_free_device(atakbd_dev);
-		return -ENOMEM;
+		return error;
 	}
 
 	atari_input_keyboard_interrupt_hook = atakbd_interrupt;
diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c
new file mode 100644
index 0000000..a67b29b
--- /dev/null
+++ b/drivers/input/keyboard/bf54x-keys.c
@@ -0,0 +1,382 @@
+/*
+ * File:         drivers/input/keyboard/bf54x-keys.c
+ * Based on:
+ * Author:       Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * Created:
+ * Description:  keypad driver for Analog Devices Blackfin BF54x Processors
+ *
+ *
+ * Modified:
+ *               Copyright 2007 Analog Devices Inc.
+ *
+ * Bugs:         Enter bugs at http://blackfin.uclinux.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/pm.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+#include <asm/portmux.h>
+#include <asm/mach/bf54x_keys.h>
+
+#define DRV_NAME	"bf54x-keys"
+#define TIME_SCALE	100	/* 100 ns */
+#define	MAX_MULT	(0xFF * TIME_SCALE)
+#define MAX_RC		8	/* Max Row/Col */
+
+static const u16 per_rows[] = {
+	P_KEY_ROW7,
+	P_KEY_ROW6,
+	P_KEY_ROW5,
+	P_KEY_ROW4,
+	P_KEY_ROW3,
+	P_KEY_ROW2,
+	P_KEY_ROW1,
+	P_KEY_ROW0,
+	0
+};
+
+static const u16 per_cols[] = {
+	P_KEY_COL7,
+	P_KEY_COL6,
+	P_KEY_COL5,
+	P_KEY_COL4,
+	P_KEY_COL3,
+	P_KEY_COL2,
+	P_KEY_COL1,
+	P_KEY_COL0,
+	0
+};
+
+struct bf54x_kpad {
+	struct input_dev *input;
+	int irq;
+	unsigned short lastkey;
+	unsigned short *keycode;
+	struct timer_list timer;
+	unsigned int keyup_test_jiffies;
+};
+
+static inline int bfin_kpad_find_key(struct bf54x_kpad *bf54x_kpad,
+			struct input_dev *input, u16 keyident)
+{
+	u16 i;
+
+	for (i = 0; i < input->keycodemax; i++)
+		if (bf54x_kpad->keycode[i + input->keycodemax] == keyident)
+			return bf54x_kpad->keycode[i];
+	return -1;
+}
+
+static inline void bfin_keycodecpy(unsigned short *keycode,
+			const unsigned int *pdata_kc,
+			unsigned short keymapsize)
+{
+	unsigned int i;
+
+	for (i = 0; i < keymapsize; i++) {
+		keycode[i] = pdata_kc[i] & 0xffff;
+		keycode[i + keymapsize] = pdata_kc[i] >> 16;
+	}
+}
+
+static inline u16 bfin_kpad_get_prescale(u32 timescale)
+{
+	u32 sclk = get_sclk();
+
+	return ((((sclk / 1000) * timescale) / 1024) - 1);
+}
+
+static inline u16 bfin_kpad_get_keypressed(struct bf54x_kpad *bf54x_kpad)
+{
+	return (bfin_read_KPAD_STAT() & KPAD_PRESSED);
+}
+
+static inline void bfin_kpad_clear_irq(void)
+{
+	bfin_write_KPAD_STAT(0xFFFF);
+	bfin_write_KPAD_ROWCOL(0xFFFF);
+}
+
+static void bfin_kpad_timer(unsigned long data)
+{
+	struct platform_device *pdev =  (struct platform_device *) data;
+	struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
+
+	if (bfin_kpad_get_keypressed(bf54x_kpad)) {
+		/* Try again later */
+		mod_timer(&bf54x_kpad->timer,
+			  jiffies + bf54x_kpad->keyup_test_jiffies);
+		return;
+	}
+
+	input_report_key(bf54x_kpad->input, bf54x_kpad->lastkey, 0);
+	input_sync(bf54x_kpad->input);
+
+	/* Clear IRQ Status */
+
+	bfin_kpad_clear_irq();
+	enable_irq(bf54x_kpad->irq);
+}
+
+static irqreturn_t bfin_kpad_isr(int irq, void *dev_id)
+{
+	struct platform_device *pdev = dev_id;
+	struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
+	struct input_dev *input = bf54x_kpad->input;
+	int key;
+	u16 rowcol = bfin_read_KPAD_ROWCOL();
+
+	key = bfin_kpad_find_key(bf54x_kpad, input, rowcol);
+
+	input_report_key(input, key, 1);
+	input_sync(input);
+
+	if (bfin_kpad_get_keypressed(bf54x_kpad)) {
+		disable_irq(bf54x_kpad->irq);
+		bf54x_kpad->lastkey = key;
+		mod_timer(&bf54x_kpad->timer,
+			  jiffies + bf54x_kpad->keyup_test_jiffies);
+	} else {
+		input_report_key(input, key, 0);
+		input_sync(input);
+
+		bfin_kpad_clear_irq();
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit bfin_kpad_probe(struct platform_device *pdev)
+{
+	struct bf54x_kpad *bf54x_kpad;
+	struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data;
+	struct input_dev *input;
+	int i, error;
+
+	if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+		printk(KERN_ERR DRV_NAME
+			": No rows, cols or keymap from pdata\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->keymapsize ||
+	    pdata->keymapsize > (pdata->rows * pdata->cols)) {
+		printk(KERN_ERR DRV_NAME ": Invalid keymapsize\n");
+		return -EINVAL;
+	}
+
+	bf54x_kpad = kzalloc(sizeof(struct bf54x_kpad), GFP_KERNEL);
+	if (!bf54x_kpad)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, bf54x_kpad);
+
+	/* Allocate memory for keymap followed by private LUT */
+	bf54x_kpad->keycode = kmalloc(pdata->keymapsize *
+					sizeof(unsigned short) * 2, GFP_KERNEL);
+	if (!bf54x_kpad->keycode) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	if (!pdata->debounce_time || !pdata->debounce_time > MAX_MULT ||
+	    !pdata->coldrive_time || !pdata->coldrive_time > MAX_MULT) {
+		printk(KERN_ERR DRV_NAME
+			": Invalid Debounce/Columdrive Time from pdata\n");
+		bfin_write_KPAD_MSEL(0xFF0);	/* Default MSEL	*/
+	} else {
+		bfin_write_KPAD_MSEL(
+			((pdata->debounce_time / TIME_SCALE)
+						& DBON_SCALE) |
+			(((pdata->coldrive_time / TIME_SCALE) << 8)
+						& COLDRV_SCALE));
+
+	}
+
+	if (!pdata->keyup_test_interval)
+		bf54x_kpad->keyup_test_jiffies = msecs_to_jiffies(50);
+	else
+		bf54x_kpad->keyup_test_jiffies =
+			msecs_to_jiffies(pdata->keyup_test_interval);
+
+	if (peripheral_request_list((u16 *)&per_rows[MAX_RC - pdata->rows],
+				    DRV_NAME)) {
+		printk(KERN_ERR DRV_NAME
+			": Requesting Peripherals failed\n");
+		error = -EFAULT;
+		goto out0;
+	}
+
+	if (peripheral_request_list((u16 *)&per_cols[MAX_RC - pdata->cols],
+				    DRV_NAME)) {
+		printk(KERN_ERR DRV_NAME
+			": Requesting Peripherals failed\n");
+		error = -EFAULT;
+		goto out1;
+	}
+
+	bf54x_kpad->irq = platform_get_irq(pdev, 0);
+	if (bf54x_kpad->irq < 0) {
+		error = -ENODEV;
+		goto out2;
+	}
+
+	error = request_irq(bf54x_kpad->irq, bfin_kpad_isr,
+				 IRQF_SAMPLE_RANDOM, DRV_NAME, pdev);
+	if (error) {
+		printk(KERN_ERR DRV_NAME
+			": unable to claim irq %d; error %d\n",
+			bf54x_kpad->irq, error);
+		error = -EBUSY;
+		goto out2;
+	}
+
+	input = input_allocate_device();
+	if (!input) {
+		error = -ENOMEM;
+		goto out3;
+	}
+
+	bf54x_kpad->input = input;
+
+	input->name = pdev->name;
+	input->phys = "bf54x-keys/input0";
+	input->dev.parent = &pdev->dev;
+
+	input_set_drvdata(input, bf54x_kpad);
+
+	input->id.bustype = BUS_HOST;
+	input->id.vendor = 0x0001;
+	input->id.product = 0x0001;
+	input->id.version = 0x0100;
+
+	input->keycodesize = sizeof(unsigned short);
+	input->keycodemax = pdata->keymapsize;
+	input->keycode = bf54x_kpad->keycode;
+
+	bfin_keycodecpy(bf54x_kpad->keycode, pdata->keymap, pdata->keymapsize);
+
+	/* setup input device */
+	__set_bit(EV_KEY, input->evbit);
+
+	if (pdata->repeat)
+		__set_bit(EV_REP, input->evbit);
+
+	for (i = 0; i < input->keycodemax; i++)
+		__set_bit(bf54x_kpad->keycode[i] & KEY_MAX, input->keybit);
+	__clear_bit(KEY_RESERVED, input->keybit);
+
+	error = input_register_device(input);
+	if (error) {
+		printk(KERN_ERR DRV_NAME
+			": Unable to register input device (%d)\n", error);
+		goto out4;
+	}
+
+	/* Init Keypad Key Up/Release test timer */
+
+	setup_timer(&bf54x_kpad->timer, bfin_kpad_timer, (unsigned long) pdev);
+
+	bfin_write_KPAD_PRESCALE(bfin_kpad_get_prescale(TIME_SCALE));
+
+	bfin_write_KPAD_CTL((((pdata->cols - 1) << 13) & KPAD_COLEN) |
+				(((pdata->rows - 1) << 10) & KPAD_ROWEN) |
+				(2 & KPAD_IRQMODE));
+
+	bfin_write_KPAD_CTL(bfin_read_KPAD_CTL() | KPAD_EN);
+
+	printk(KERN_ERR DRV_NAME
+		": Blackfin BF54x Keypad registered IRQ %d\n", bf54x_kpad->irq);
+
+	return 0;
+
+out4:
+	input_free_device(input);
+out3:
+	free_irq(bf54x_kpad->irq, pdev);
+out2:
+	peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]);
+out1:
+	peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]);
+out0:
+	kfree(bf54x_kpad->keycode);
+out:
+	kfree(bf54x_kpad);
+	platform_set_drvdata(pdev, NULL);
+
+	return error;
+}
+
+static int __devexit bfin_kpad_remove(struct platform_device *pdev)
+{
+	struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data;
+	struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
+
+	del_timer_sync(&bf54x_kpad->timer);
+	free_irq(bf54x_kpad->irq, pdev);
+
+	input_unregister_device(bf54x_kpad->input);
+
+	peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]);
+	peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]);
+
+	kfree(bf54x_kpad->keycode);
+	kfree(bf54x_kpad);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+struct platform_driver bfin_kpad_device_driver = {
+	.probe		= bfin_kpad_probe,
+	.remove		= __devexit_p(bfin_kpad_remove),
+	.driver		= {
+		.name	= DRV_NAME,
+	}
+};
+
+static int __init bfin_kpad_init(void)
+{
+	return platform_driver_register(&bfin_kpad_device_driver);
+}
+
+static void __exit bfin_kpad_exit(void)
+{
+	platform_driver_unregister(&bfin_kpad_device_driver);
+}
+
+module_init(bfin_kpad_init);
+module_exit(bfin_kpad_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Keypad driver for BF54x Processors");
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index f0b22b8..e2a3293 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -54,6 +54,7 @@
 	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
 	struct input_dev *input;
 	int i, error;
+	int wakeup = 0;
 
 	input = input_allocate_device();
 	if (!input)
@@ -77,31 +78,51 @@
 		int irq = gpio_to_irq(button->gpio);
 		unsigned int type = button->type ?: EV_KEY;
 
-		set_irq_type(irq, IRQ_TYPE_EDGE_BOTH);
-		error = request_irq(irq, gpio_keys_isr, IRQF_SAMPLE_RANDOM,
-				     button->desc ? button->desc : "gpio_keys",
-				     pdev);
+		if (irq < 0) {
+			error = irq;
+			printk(KERN_ERR
+				"gpio-keys: "
+				"Unable to get irq number for GPIO %d,"
+				"error %d\n",
+				button->gpio, error);
+			goto fail;
+		}
+
+		error = request_irq(irq, gpio_keys_isr,
+				    IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_RISING |
+					IRQF_TRIGGER_FALLING,
+				    button->desc ? button->desc : "gpio_keys",
+				    pdev);
 		if (error) {
-			printk(KERN_ERR "gpio-keys: unable to claim irq %d; error %d\n",
+			printk(KERN_ERR
+				"gpio-keys: Unable to claim irq %d; error %d\n",
 				irq, error);
 			goto fail;
 		}
 
+		if (button->wakeup)
+			wakeup = 1;
+
 		input_set_capability(input, type, button->code);
 	}
 
 	error = input_register_device(input);
 	if (error) {
-		printk(KERN_ERR "Unable to register gpio-keys input device\n");
+		printk(KERN_ERR
+			"gpio-keys: Unable to register input device, "
+			"error: %d\n", error);
 		goto fail;
 	}
 
+	device_init_wakeup(&pdev->dev, wakeup);
+
 	return 0;
 
  fail:
-	for (i = i - 1; i >= 0; i--)
+	while (--i >= 0)
 		free_irq(gpio_to_irq(pdata->buttons[i].gpio), pdev);
 
+	platform_set_drvdata(pdev, NULL);
 	input_free_device(input);
 
 	return error;
@@ -113,6 +134,8 @@
 	struct input_dev *input = platform_get_drvdata(pdev);
 	int i;
 
+	device_init_wakeup(&pdev->dev, 0);
+
 	for (i = 0; i < pdata->nbuttons; i++) {
 		int irq = gpio_to_irq(pdata->buttons[i].gpio);
 		free_irq(irq, pdev);
@@ -123,9 +146,53 @@
 	return 0;
 }
 
+
+#ifdef CONFIG_PM
+static int gpio_keys_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+	int i;
+
+	if (device_may_wakeup(&pdev->dev)) {
+		for (i = 0; i < pdata->nbuttons; i++) {
+			struct gpio_keys_button *button = &pdata->buttons[i];
+			if (button->wakeup) {
+				int irq = gpio_to_irq(button->gpio);
+				enable_irq_wake(irq);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int gpio_keys_resume(struct platform_device *pdev)
+{
+	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+	int i;
+
+	if (device_may_wakeup(&pdev->dev)) {
+		for (i = 0; i < pdata->nbuttons; i++) {
+			struct gpio_keys_button *button = &pdata->buttons[i];
+			if (button->wakeup) {
+				int irq = gpio_to_irq(button->gpio);
+				disable_irq_wake(irq);
+			}
+		}
+	}
+
+	return 0;
+}
+#else
+#define gpio_keys_suspend	NULL
+#define gpio_keys_resume	NULL
+#endif
+
 struct platform_driver gpio_keys_device_driver = {
 	.probe		= gpio_keys_probe,
 	.remove		= __devexit_p(gpio_keys_remove),
+	.suspend	= gpio_keys_suspend,
+	.resume		= gpio_keys_resume,
 	.driver		= {
 		.name	= "gpio-keys",
 	}
diff --git a/drivers/input/keyboard/jornada680_kbd.c b/drivers/input/keyboard/jornada680_kbd.c
new file mode 100644
index 0000000..bec1cf4
--- /dev/null
+++ b/drivers/input/keyboard/jornada680_kbd.c
@@ -0,0 +1,277 @@
+/*
+ * drivers/input/keyboard/jornada680_kbd.c
+ *
+ * HP Jornada 620/660/680/690 scan keyboard platform driver
+ *  Copyright (C) 2007  Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
+ *
+ * Based on hp680_keyb.c
+ *  Copyright (C) 2006 Paul Mundt
+ *  Copyright (C) 2005 Andriy Skulysh
+ * Split from drivers/input/keyboard/hp600_keyb.c
+ *  Copyright (C) 2000 Yaegashi Takeshi (hp6xx kbd scan routine and translation table)
+ *  Copyright (C) 2000 Niibe Yutaka (HP620 Keyb translation table)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/input-polldev.h>
+#include <linux/jiffies.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+
+#define PCCR 0xa4000104
+#define PDCR 0xa4000106
+#define PECR 0xa4000108
+#define PFCR 0xa400010a
+#define PCDR 0xa4000124
+#define PDDR 0xa4000126
+#define PEDR 0xa4000128
+#define PFDR 0xa400012a
+#define PGDR 0xa400012c
+#define PHDR 0xa400012e
+#define PJDR 0xa4000130
+#define PKDR 0xa4000132
+#define PLDR 0xa4000134
+
+static const unsigned short jornada_scancodes[] = {
+/* PTD1 */	KEY_CAPSLOCK, KEY_MACRO, KEY_LEFTCTRL, 0, KEY_ESC, 0, 0, 0,	/*  1  -> 8   */
+		KEY_F1, KEY_F2, KEY_F3, KEY_F8, KEY_F7, KEY_F2, KEY_F4, KEY_F5,	/*  9  -> 16  */
+/* PTD5 */	KEY_SLASH, KEY_APOSTROPHE, KEY_ENTER, 0, KEY_Z, 0, 0, 0,	/*  17 -> 24  */
+		KEY_X, KEY_C, KEY_V, KEY_DOT, KEY_COMMA, KEY_M, KEY_B, KEY_N,	/*  25 -> 32  */
+/* PTD7 */	KEY_KP2, KEY_KP6, 0, 0, 0, 0, 0, 0,				/*  33 -> 40  */
+		0, 0, 0, KEY_KP4, 0, 0, KEY_LEFTALT, KEY_HANJA,			/*  41 -> 48  */
+/* PTE0 */	0, 0, 0, 0, KEY_FINANCE, 0, 0, 0,				/*  49 -> 56  */
+		KEY_LEFTCTRL, 0, KEY_SPACE, KEY_KPDOT, KEY_VOLUMEUP, 249, 0, 0, /*  57 -> 64  */
+/* PTE1 */	KEY_SEMICOLON, KEY_RIGHTBRACE, KEY_BACKSLASH, 0, KEY_A, 0, 0, 0,/*  65 -> 72  */
+		KEY_S, KEY_D, KEY_F, KEY_L, KEY_K, KEY_J, KEY_G, KEY_H,		/*  73 -> 80  */
+/* PTE3 */	KEY_KP8, KEY_LEFTMETA, KEY_RIGHTSHIFT, 0, KEY_TAB, 0, 0,0,	/*  81 -> 88  */
+		0, KEY_LEFTSHIFT, 0, 0, 0, 0, 0, 0,				/*  89 -> 96  */
+/* PTE6 */	KEY_P, KEY_LEFTBRACE, KEY_BACKSPACE, 0, KEY_Q, 0, 0, 0,		/*  97 -> 104 */
+		KEY_W, KEY_E, KEY_R, KEY_O, KEY_I, KEY_U, KEY_T, KEY_R,		/* 105 -> 112 */
+/* PTE7 */	KEY_0, KEY_MINUS, KEY_EQUAL, 0, KEY_1, 0, 0, 0,			/* 113 -> 120 */
+		KEY_2, KEY_3, KEY_4, KEY_9, KEY_8, KEY_7, KEY_5, KEY_6,		/* 121 -> 128 */
+/* **** */	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0
+};
+
+#define JORNADA_SCAN_SIZE	18
+
+struct jornadakbd {
+	struct input_polled_dev *poll_dev;
+	unsigned short keymap[ARRAY_SIZE(jornada_scancodes)];
+	unsigned char length;
+	unsigned char old_scan[JORNADA_SCAN_SIZE];
+	unsigned char new_scan[JORNADA_SCAN_SIZE];
+};
+
+static void jornada_parse_kbd(struct jornadakbd *jornadakbd)
+{
+	struct input_dev *input_dev = jornadakbd->poll_dev->input;
+	unsigned short *keymap = jornadakbd->keymap;
+	unsigned int sync_me = 0;
+	unsigned int i, j;
+
+	for (i = 0; i < JORNADA_SCAN_SIZE; i++) {
+		unsigned char new = jornadakbd->new_scan[i];
+		unsigned char old = jornadakbd->old_scan[i];
+		unsigned int xor = new ^ old;
+
+		if (xor == 0)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			unsigned int bit = 1 << j;
+			if (xor & bit) {
+				unsigned int scancode = (i << 3) + j;
+				input_event(input_dev,
+					    EV_MSC, MSC_SCAN, scancode);
+				input_report_key(input_dev,
+						 keymap[scancode],
+						 !(new & bit));
+				sync_me = 1;
+			}
+		}
+	}
+
+	if (sync_me)
+	    input_sync(input_dev);
+}
+
+static void jornada_scan_keyb(unsigned char *s)
+{
+	int i;
+	unsigned short ec_static, dc_static; /* = UINT16_t */
+	unsigned char matrix_switch[] = {
+		0xfd, 0xff,   /* PTD1 PD(1) */
+		0xdf, 0xff,   /* PTD5 PD(5) */
+		0x7f, 0xff,   /* PTD7 PD(7) */
+		0xff, 0xfe,   /* PTE0 PE(0) */
+		0xff, 0xfd,   /* PTE1 PE(1) */
+		0xff, 0xf7,   /* PTE3 PE(3) */
+		0xff, 0xbf,   /* PTE6 PE(6) */
+		0xff, 0x7f,   /* PTE7 PE(7) */
+	}, *t = matrix_switch;
+	/* PD(x) :
+	1.   0xcc0c & (1~(1 << (2*(x)+1)))))
+	2.   (0xf0cf & 0xfffff) */
+	/* PE(x) :
+	1.   0xcc0c & 0xffff
+	2.   0xf0cf & (1~(1 << (2*(x)+1))))) */
+	unsigned short matrix_PDE[] = {
+		0xcc04, 0xf0cf,  /* PD(1) */
+		0xc40c, 0xf0cf,	 /* PD(5) */
+		0x4c0c, 0xf0cf,  /* PD(7) */
+		0xcc0c, 0xf0cd,  /* PE(0) */
+		0xcc0c, 0xf0c7,	 /* PE(1) */
+		0xcc0c, 0xf04f,  /* PE(3) */
+		0xcc0c, 0xd0cf,	 /* PE(6) */
+		0xcc0c, 0x70cf,	 /* PE(7) */
+	}, *y = matrix_PDE;
+
+	/* Save these control reg bits */
+	dc_static = (ctrl_inw(PDCR) & (~0xcc0c));
+	ec_static = (ctrl_inw(PECR) & (~0xf0cf));
+
+	for (i = 0; i < 8; i++) {
+		/* disable output for all but the one we want to scan */
+		ctrl_outw((dc_static | *y++), PDCR);
+		ctrl_outw((ec_static | *y++), PECR);
+		udelay(5);
+
+		/* Get scanline row */
+		ctrl_outb(*t++, PDDR);
+		ctrl_outb(*t++, PEDR);
+		udelay(50);
+
+		/* Read data */
+		*s++ = ctrl_inb(PCDR);
+		*s++ = ctrl_inb(PFDR);
+	}
+	/* Scan no lines */
+	ctrl_outb(0xff, PDDR);
+	ctrl_outb(0xff, PEDR);
+
+	/* Enable all scanlines */
+	ctrl_outw((dc_static | (0x5555 & 0xcc0c)),PDCR);
+	ctrl_outw((ec_static | (0x5555 & 0xf0cf)),PECR);
+
+	/* Ignore extra keys and events */
+	*s++ = ctrl_inb(PGDR);
+	*s++ = ctrl_inb(PHDR);
+}
+
+static void jornadakbd680_poll(struct input_polled_dev *dev)
+{
+	struct jornadakbd *jornadakbd = dev->private;
+
+	jornada_scan_keyb(jornadakbd->new_scan);
+	jornada_parse_kbd(jornadakbd);
+	memcpy(jornadakbd->old_scan, jornadakbd->new_scan, JORNADA_SCAN_SIZE);
+}
+
+static int __devinit jornada680kbd_probe(struct platform_device *pdev)
+{
+	struct jornadakbd *jornadakbd;
+	struct input_polled_dev *poll_dev;
+	struct input_dev *input_dev;
+	int i, error;
+
+	jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL);
+	if (!jornadakbd)
+		return -ENOMEM;
+
+	poll_dev = input_allocate_polled_device();
+	if (!poll_dev) {
+		error = -ENOMEM;
+		goto failed;
+	}
+
+	platform_set_drvdata(pdev, jornadakbd);
+
+	jornadakbd->poll_dev = poll_dev;
+
+	memcpy(jornadakbd->keymap, jornada_scancodes,
+		sizeof(jornadakbd->keymap));
+
+	poll_dev->private = jornadakbd;
+	poll_dev->poll = jornadakbd680_poll;
+	poll_dev->poll_interval = 50; /* msec */
+
+	input_dev = poll_dev->input;
+	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->name = "HP Jornada 680 keyboard";
+	input_dev->phys = "jornadakbd/input0";
+	input_dev->keycode = jornadakbd->keymap;
+	input_dev->keycodesize = sizeof(unsigned short);
+	input_dev->keycodemax = ARRAY_SIZE(jornada_scancodes);
+	input_dev->dev.parent = &pdev->dev;
+	input_dev->id.bustype = BUS_HOST;
+
+	for (i = 0; i < 128; i++)
+		if (jornadakbd->keymap[i])
+			__set_bit(jornadakbd->keymap[i], input_dev->keybit);
+	__clear_bit(KEY_RESERVED, input_dev->keybit);
+
+	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+
+	error = input_register_polled_device(jornadakbd->poll_dev);
+	if (error)
+		goto failed;
+
+	return 0;
+
+ failed:
+	printk(KERN_ERR "Jornadakbd: failed to register driver, error: %d\n",
+		error);
+	platform_set_drvdata(pdev, NULL);
+	input_free_polled_device(poll_dev);
+	kfree(jornadakbd);
+	return error;
+
+}
+
+static int __devexit jornada680kbd_remove(struct platform_device *pdev)
+{
+	struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+	input_unregister_polled_device(jornadakbd->poll_dev);
+	input_free_polled_device(jornadakbd->poll_dev);
+	kfree(jornadakbd);
+
+	return 0;
+}
+
+static struct platform_driver jornada680kbd_driver = {
+	.driver	= {
+		.name	= "jornada680_kbd",
+	},
+	.probe	= jornada680kbd_probe,
+	.remove	= __devexit_p(jornada680kbd_remove),
+};
+
+static int __init jornada680kbd_init(void)
+{
+	return platform_driver_register(&jornada680kbd_driver);
+}
+
+static void __exit jornada680kbd_exit(void)
+{
+	platform_driver_unregister(&jornada680kbd_driver);
+}
+
+module_init(jornada680kbd_init);
+module_exit(jornada680kbd_exit);
+
+MODULE_AUTHOR("Kristoffer Ericson <kristoffer.ericson@gmail.com>");
+MODULE_DESCRIPTION("HP Jornada 620/660/680/690 Keyboard Driver");
+MODULE_LICENSE("GPLv2");
diff --git a/drivers/input/keyboard/jornada720_kbd.c b/drivers/input/keyboard/jornada720_kbd.c
new file mode 100644
index 0000000..e6696b3
--- /dev/null
+++ b/drivers/input/keyboard/jornada720_kbd.c
@@ -0,0 +1,185 @@
+/*
+ * drivers/input/keyboard/jornada720_kbd.c
+ *
+ * HP Jornada 720 keyboard platform driver
+ *
+ * Copyright (C) 2006/2007 Kristoffer Ericson <Kristoffer.Ericson@Gmail.com>
+ *
+ *    Copyright (C) 2006 jornada 720 kbd driver by
+		Filip Zyzniewsk <Filip.Zyzniewski@tefnet.plX
+ *     based on (C) 2004 jornada 720 kbd driver by
+		Alex Lange <chicken@handhelds.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/arch/jornada720.h>
+#include <asm/hardware.h>
+
+MODULE_AUTHOR("Kristoffer Ericson <Kristoffer.Ericson@gmail.com>");
+MODULE_DESCRIPTION("HP Jornada 710/720/728 keyboard driver");
+MODULE_LICENSE("GPLv2");
+
+static unsigned short jornada_std_keymap[128] = {					/* ROW */
+	0, KEY_ESC, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7,		/* #1  */
+	KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE,	/*  -> */
+	0, KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9,		/* #2  */
+	KEY_0, KEY_MINUS, KEY_EQUAL,0, 0, 0,						/*  -> */
+	0, KEY_Q, KEY_W, KEY_E, KEY_R, KEY_T, KEY_Y, KEY_U, KEY_I, KEY_O,		/* #3  */
+	KEY_P, KEY_BACKSLASH, KEY_BACKSPACE, 0, 0, 0,					/*  -> */
+	0, KEY_A, KEY_S, KEY_D, KEY_F, KEY_G, KEY_H, KEY_J, KEY_K, KEY_L,		/* #4  */
+	KEY_SEMICOLON, KEY_LEFTBRACE, KEY_RIGHTBRACE, 0, 0, 0,				/*  -> */
+	0, KEY_Z, KEY_X, KEY_C, KEY_V, KEY_B, KEY_N, KEY_M, KEY_COMMA,			/* #5  */
+	KEY_DOT, KEY_KPMINUS, KEY_APOSTROPHE, KEY_ENTER, 0, 0,0,			/*  -> */
+	0, KEY_TAB, 0, KEY_LEFTSHIFT, 0, KEY_APOSTROPHE, 0, 0, 0, 0,			/* #6  */
+	KEY_UP, 0, KEY_RIGHTSHIFT, 0, 0, 0,0, 0, 0, 0, 0, KEY_LEFTALT, KEY_GRAVE,	/*  -> */
+	0, 0, KEY_LEFT, KEY_DOWN, KEY_RIGHT, 0, 0, 0, 0,0, KEY_KPASTERISK,		/*  -> */
+	KEY_LEFTCTRL, 0, KEY_SPACE, 0, 0, 0, KEY_SLASH, KEY_DELETE, 0, 0,		/*  -> */
+	0, 0, 0, KEY_POWER,								/*  -> */
+};
+
+struct jornadakbd {
+	unsigned short keymap[ARRAY_SIZE(jornada_std_keymap)];
+	struct input_dev *input;
+};
+
+static irqreturn_t jornada720_kbd_interrupt(int irq, void *dev_id)
+{
+	struct platform_device *pdev = dev_id;
+	struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
+	struct input_dev *input = jornadakbd->input;
+	u8 count, kbd_data, scan_code;
+
+	/* startup ssp with spinlock */
+	jornada_ssp_start();
+
+	if (jornada_ssp_inout(GETSCANKEYCODE) != TXDUMMY) {
+		printk(KERN_DEBUG
+			"jornada720_kbd: "
+			"GetKeycode command failed with ETIMEDOUT, "
+			"flushed bus\n");
+	} else {
+		/* How many keycodes are waiting for us? */
+		count = jornada_ssp_byte(TXDUMMY);
+
+		/* Lets drag them out one at a time */
+		while (count--) {
+			/* Exchange TxDummy for location (keymap[kbddata]) */
+			kbd_data = jornada_ssp_byte(TXDUMMY);
+			scan_code = kbd_data & 0x7f;
+
+			input_event(input, EV_MSC, MSC_SCAN, scan_code);
+			input_report_key(input, jornadakbd->keymap[scan_code],
+					 !(kbd_data & 0x80));
+			input_sync(input);
+		}
+	}
+
+	/* release spinlock and turn off ssp */
+	jornada_ssp_end();
+
+	return IRQ_HANDLED;
+};
+
+static int __devinit jornada720_kbd_probe(struct platform_device *pdev)
+{
+	struct jornadakbd *jornadakbd;
+	struct input_dev *input_dev;
+	int i, err;
+
+	jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!jornadakbd || !input_dev) {
+		err = -ENOMEM;
+		goto fail1;
+	}
+
+	platform_set_drvdata(pdev, jornadakbd);
+
+	memcpy(jornadakbd->keymap, jornada_std_keymap,
+		sizeof(jornada_std_keymap));
+	jornadakbd->input = input_dev;
+
+	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->name = "HP Jornada 720 keyboard";
+	input_dev->phys = "jornadakbd/input0";
+	input_dev->keycode = jornadakbd->keymap;
+	input_dev->keycodesize = sizeof(unsigned short);
+	input_dev->keycodemax = ARRAY_SIZE(jornada_std_keymap);
+	input_dev->id.bustype = BUS_HOST;
+	input_dev->dev.parent = &pdev->dev;
+
+	for (i = 0; i < ARRAY_SIZE(jornadakbd->keymap); i++)
+		__set_bit(jornadakbd->keymap[i], input_dev->keybit);
+	__clear_bit(KEY_RESERVED, input_dev->keybit);
+
+	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+
+	err = request_irq(IRQ_GPIO0,
+			  jornada720_kbd_interrupt,
+			  IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+			  "jornadakbd", pdev);
+	if (err) {
+		printk(KERN_INFO "jornadakbd720_kbd: Unable to grab IRQ\n");
+		goto fail1;
+	}
+
+	err = input_register_device(jornadakbd->input);
+	if (err)
+		goto fail2;
+
+	return 0;
+
+ fail2:	/* IRQ, DEVICE, MEMORY */
+	free_irq(IRQ_GPIO0, pdev);
+ fail1:	/* DEVICE, MEMORY */
+	platform_set_drvdata(pdev, NULL);
+	input_free_device(input_dev);
+	kfree(jornadakbd);
+	return err;
+};
+
+static int __devexit jornada720_kbd_remove(struct platform_device *pdev)
+{
+	struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
+
+	free_irq(IRQ_GPIO0, pdev);
+	platform_set_drvdata(pdev, NULL);
+	input_unregister_device(jornadakbd->input);
+	kfree(jornadakbd);
+
+	return 0;
+}
+
+static struct platform_driver jornada720_kbd_driver = {
+	.driver  = {
+		.name    = "jornada720_kbd",
+	 },
+	.probe   = jornada720_kbd_probe,
+	.remove  = __devexit_p(jornada720_kbd_remove),
+};
+
+static int __init jornada720_kbd_init(void)
+{
+	return platform_driver_register(&jornada720_kbd_driver);
+}
+
+static void __exit jornada720_kbd_exit(void)
+{
+	platform_driver_unregister(&jornada720_kbd_driver);
+}
+
+module_init(jornada720_kbd_init);
+module_exit(jornada720_kbd_exit);
diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
new file mode 100644
index 0000000..2b40428
--- /dev/null
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -0,0 +1,252 @@
+/*
+ * SEGA Dreamcast keyboard driver
+ * Based on drivers/usb/usbkbd.c
+ * Copyright YAEGASHI Takeshi, 2001
+ * Porting to 2.6 Copyright Adrian McMenamin, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/timer.h>
+#include <linux/maple.h>
+#include <asm/mach/maple.h>
+
+/* Very simple mutex to ensure proper cleanup */
+static DEFINE_MUTEX(maple_keyb_mutex);
+
+#define NR_SCANCODES 256
+
+MODULE_AUTHOR("YAEGASHI Takeshi, Adrian McMenamin");
+MODULE_DESCRIPTION("SEGA Dreamcast keyboard driver");
+MODULE_LICENSE("GPL");
+
+struct dc_kbd {
+	struct input_dev *dev;
+	unsigned short keycode[NR_SCANCODES];
+	unsigned char new[8];
+	unsigned char old[8];
+};
+
+static const unsigned short dc_kbd_keycode[NR_SCANCODES] = {
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_A, KEY_B, KEY_C, KEY_D,
+	KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J, KEY_K, KEY_L,
+	KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T,
+	KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z, KEY_1, KEY_2,
+	KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0,
+	KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS, KEY_EQUAL, KEY_LEFTBRACE,
+	KEY_RIGHTBRACE, KEY_BACKSLASH, KEY_BACKSLASH, KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA,
+	KEY_DOT, KEY_SLASH, KEY_CAPSLOCK, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6,
+	KEY_F7, KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_F12, KEY_SYSRQ,
+	KEY_SCROLLLOCK, KEY_PAUSE, KEY_INSERT, KEY_HOME, KEY_PAGEUP, KEY_DELETE,
+	KEY_END, KEY_PAGEDOWN, KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP,
+	KEY_NUMLOCK, KEY_KPSLASH, KEY_KPASTERISK, KEY_KPMINUS, KEY_KPPLUS, KEY_KPENTER, KEY_KP1, KEY_KP2,
+	KEY_KP3, KEY_KP4, KEY_KP5, KEY_KP6, KEY_KP7, KEY_KP8, KEY_KP9, KEY_KP0, KEY_KPDOT,
+	KEY_102ND, KEY_COMPOSE, KEY_POWER, KEY_KPEQUAL, KEY_F13, KEY_F14, KEY_F15,
+	KEY_F16, KEY_F17, KEY_F18, KEY_F19, KEY_F20,
+	KEY_F21, KEY_F22, KEY_F23, KEY_F24, KEY_OPEN, KEY_HELP, KEY_PROPS, KEY_FRONT,
+	KEY_STOP, KEY_AGAIN, KEY_UNDO, KEY_CUT, KEY_COPY, KEY_PASTE, KEY_FIND, KEY_MUTE,
+	KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_KPCOMMA, KEY_RESERVED, KEY_RO, KEY_KATAKANAHIRAGANA , KEY_YEN,
+	KEY_HENKAN, KEY_MUHENKAN, KEY_KPJPCOMMA, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_HANGEUL, KEY_HANJA, KEY_KATAKANA, KEY_HIRAGANA, KEY_ZENKAKUHANKAKU, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+	KEY_LEFTCTRL, KEY_LEFTSHIFT, KEY_LEFTALT, KEY_LEFTMETA, KEY_RIGHTCTRL, KEY_RIGHTSHIFT, KEY_RIGHTALT, KEY_RIGHTMETA,
+	KEY_PLAYPAUSE, KEY_STOPCD, KEY_PREVIOUSSONG, KEY_NEXTSONG, KEY_EJECTCD, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE,
+	KEY_WWW, KEY_BACK, KEY_FORWARD, KEY_STOP, KEY_FIND, KEY_SCROLLUP, KEY_SCROLLDOWN, KEY_EDIT, KEY_SLEEP,
+	KEY_SCREENLOCK, KEY_REFRESH, KEY_CALC, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED
+};
+
+static void dc_scan_kbd(struct dc_kbd *kbd)
+{
+	struct input_dev *dev = kbd->dev;
+	void *ptr;
+	int code, keycode;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		code = i + 224;
+		keycode = kbd->keycode[code];
+		input_event(dev, EV_MSC, MSC_SCAN, code);
+		input_report_key(dev, keycode, (kbd->new[0] >> i) & 1);
+	}
+
+	for (i = 2; i < 8; i++) {
+		ptr = memchr(kbd->new + 2, kbd->old[i], 6);
+		code = kbd->old[i];
+		if (code > 3 && ptr == NULL) {
+			keycode = kbd->keycode[code];
+			if (keycode) {
+				input_event(dev, EV_MSC, MSC_SCAN, code);
+				input_report_key(dev, keycode, 0);
+			} else
+				printk(KERN_DEBUG "maple_keyb: "
+					"Unknown key (scancode %#x) released.",
+					code);
+		}
+		ptr = memchr(kbd->old + 2, kbd->new[i], 6);
+		code = kbd->new[i];
+		if (code > 3 && ptr) {
+			keycode = kbd->keycode[code];
+			if (keycode) {
+				input_event(dev, EV_MSC, MSC_SCAN, code);
+				input_report_key(dev, keycode, 1);
+			} else
+				printk(KERN_DEBUG "maple_keyb: "
+					"Unknown key (scancode %#x) pressed.",
+					code);
+		}
+	}
+	input_sync(dev);
+	memcpy(kbd->old, kbd->new, 8);
+}
+
+static void dc_kbd_callback(struct mapleq *mq)
+{
+	struct maple_device *mapledev = mq->dev;
+	struct dc_kbd *kbd = mapledev->private_data;
+	unsigned long *buf = mq->recvbuf;
+
+	/*
+	 * We should always be getting the lock because the only
+	 * time it may be locked if driver is in cleanup phase.
+	 */
+	if (likely(mutex_trylock(&maple_keyb_mutex))) {
+
+		if (buf[1] == mapledev->function) {
+			memcpy(kbd->new, buf + 2, 8);
+			dc_scan_kbd(kbd);
+		}
+
+		mutex_unlock(&maple_keyb_mutex);
+	}
+}
+
+static int dc_kbd_connect(struct maple_device *mdev)
+{
+	int i, error;
+	struct dc_kbd *kbd;
+	struct input_dev *dev;
+
+	if (!(mdev->function & MAPLE_FUNC_KEYBOARD))
+		return -EINVAL;
+
+	kbd = kzalloc(sizeof(struct dc_kbd), GFP_KERNEL);
+	dev = input_allocate_device();
+	if (!kbd || !dev) {
+		error = -ENOMEM;
+		goto fail;
+	}
+
+	mdev->private_data = kbd;
+
+	kbd->dev = dev;
+	memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode));
+
+	dev->name = mdev->product_name;
+	dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	dev->keycode = kbd->keycode;
+	dev->keycodesize = sizeof (unsigned short);
+	dev->keycodemax = ARRAY_SIZE(kbd->keycode);
+	dev->id.bustype = BUS_HOST;
+	dev->dev.parent = &mdev->dev;
+
+	for (i = 0; i < NR_SCANCODES; i++)
+		__set_bit(dc_kbd_keycode[i], dev->keybit);
+	__clear_bit(KEY_RESERVED, dev->keybit);
+
+	input_set_capability(dev, EV_MSC, MSC_SCAN);
+	input_set_drvdata(dev, kbd);
+
+	error = input_register_device(dev);
+	if (error)
+		goto fail;
+
+	/* Maple polling is locked to VBLANK - which may be just 50/s */
+	maple_getcond_callback(mdev, dc_kbd_callback, HZ/50, MAPLE_FUNC_KEYBOARD);
+	return 0;
+
+ fail:
+	input_free_device(dev);
+	kfree(kbd);
+	mdev->private_data = NULL;
+	return error;
+}
+
+static void dc_kbd_disconnect(struct maple_device *mdev)
+{
+	struct dc_kbd *kbd;
+
+	mutex_lock(&maple_keyb_mutex);
+
+	kbd = mdev->private_data;
+	mdev->private_data = NULL;
+	input_unregister_device(kbd->dev);
+	kfree(kbd);
+
+	mutex_unlock(&maple_keyb_mutex);
+}
+
+/* allow the keyboard to be used */
+static int probe_maple_kbd(struct device *dev)
+{
+	struct maple_device *mdev = to_maple_dev(dev);
+	struct maple_driver *mdrv = to_maple_driver(dev->driver);
+	int error;
+
+	error = dc_kbd_connect(mdev);
+	if (error)
+		return error;
+
+	mdev->driver = mdrv;
+	mdev->registered = 1;
+
+	return 0;
+}
+
+static struct maple_driver dc_kbd_driver = {
+	.function = MAPLE_FUNC_KEYBOARD,
+	.connect = dc_kbd_connect,
+	.disconnect = dc_kbd_disconnect,
+	.drv = {
+		.name = "Dreamcast_keyboard",
+		.probe = probe_maple_kbd,
+       },
+};
+
+static int __init dc_kbd_init(void)
+{
+	return maple_driver_register(&dc_kbd_driver.drv);
+}
+
+static void __exit dc_kbd_exit(void)
+{
+	driver_unregister(&dc_kbd_driver.drv);
+}
+
+module_init(dc_kbd_init);
+module_exit(dc_kbd_exit);
diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index 3a22863..76f1969 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c
@@ -233,7 +233,7 @@
 			omap_writew(0, OMAP_MPUIO_BASE + OMAP_MPUIO_KBD_MASKIT);
 			kp_cur_group = -1;
 		}
- 	}
+	}
 }
 
 static ssize_t omap_kp_enable_show(struct device *dev,
@@ -318,7 +318,7 @@
 	keymap = pdata->keymap;
 
 	if (pdata->rep)
-		set_bit(EV_REP, input_dev->evbit);
+		__set_bit(EV_REP, input_dev->evbit);
 
 	if (pdata->delay)
 		omap_kp->delay = pdata->delay;
@@ -365,9 +365,9 @@
 		goto err2;
 
 	/* setup input device */
-	set_bit(EV_KEY, input_dev->evbit);
+	__set_bit(EV_KEY, input_dev->evbit);
 	for (i = 0; keymap[i] != 0; i++)
-		set_bit(keymap[i] & KEY_MAX, input_dev->keybit);
+		__set_bit(keymap[i] & KEY_MAX, input_dev->keybit);
 	input_dev->name = "omap-keypad";
 	input_dev->phys = "omap-keypad/input0";
 	input_dev->dev.parent = &pdev->dev;
@@ -377,10 +377,6 @@
 	input_dev->id.product = 0x0001;
 	input_dev->id.version = 0x0100;
 
-	input_dev->keycode = keymap;
-	input_dev->keycodesize = sizeof(unsigned int);
-	input_dev->keycodemax = pdata->keymapsize;
-
 	ret = input_register_device(omap_kp->input);
 	if (ret < 0) {
 		printk(KERN_ERR "Unable to register omap-keypad input device\n");
@@ -403,15 +399,15 @@
 	} else {
 		for (irq_idx = 0; irq_idx < omap_kp->rows; irq_idx++) {
 			if (request_irq(OMAP_GPIO_IRQ(row_gpios[irq_idx]),
-				       	omap_kp_interrupt,
+					omap_kp_interrupt,
 					IRQF_TRIGGER_FALLING,
-				       	"omap-keypad", omap_kp) < 0)
+					"omap-keypad", omap_kp) < 0)
 				goto err5;
 		}
 	}
 	return 0;
 err5:
-	for (i = irq_idx-1; i >=0; i--)
+	for (i = irq_idx - 1; i >=0; i--)
 		free_irq(row_gpios[i], 0);
 err4:
 	input_unregister_device(omap_kp->input);
@@ -440,9 +436,9 @@
 	if (cpu_is_omap24xx()) {
 		int i;
 		for (i = 0; i < omap_kp->cols; i++)
-	    		omap_free_gpio(col_gpios[i]);
+			omap_free_gpio(col_gpios[i]);
 		for (i = 0; i < omap_kp->rows; i++) {
-	    		omap_free_gpio(row_gpios[i]);
+			omap_free_gpio(row_gpios[i]);
 			free_irq(OMAP_GPIO_IRQ(row_gpios[i]), 0);
 		}
 	} else {
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 2c5f11a..64d70a9 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -48,11 +48,13 @@
 	{ { 0x63, 0x02, 0x50 },	0xef, 0xef, ALPS_FW_BK_1 },		/* NEC Versa L320 */
 	{ { 0x63, 0x02, 0x64 },	0xf8, 0xf8, 0 },
 	{ { 0x63, 0x03, 0xc8 }, 0xf8, 0xf8, ALPS_PASS },		/* Dell Latitude D800 */
+	{ { 0x73, 0x00, 0x0a },	0xf8, 0xf8, ALPS_DUALPOINT },		/* ThinkPad R61 8918-5QG */
 	{ { 0x73, 0x02, 0x0a },	0xf8, 0xf8, 0 },
 	{ { 0x73, 0x02, 0x14 },	0xf8, 0xf8, ALPS_FW_BK_2 },		/* Ahtec Laptop */
 	{ { 0x20, 0x02, 0x0e },	0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* XXX */
 	{ { 0x22, 0x02, 0x0a },	0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT },
 	{ { 0x22, 0x02, 0x14 }, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude D600 */
+	{ { 0x73, 0x02, 0x50 }, 0xcf, 0xff, ALPS_FW_BK_1 } /* Dell Vostro 1400 */
 };
 
 /*
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index a1804bf..0117817 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -502,18 +502,23 @@
 
 		/* reset the accumulator on release */
 		memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
+	}
 
-		/* Geyser 3 will continue to send packets continually after
-		   the first touch unless reinitialised. Do so if it's been
-		   idle for a while in order to avoid waking the kernel up
-		   several hundred times a second */
-		if (!key && atp_is_geyser_3(dev)) {
+	/* Geyser 3 will continue to send packets continually after
+	   the first touch unless reinitialised. Do so if it's been
+	   idle for a while in order to avoid waking the kernel up
+	   several hundred times a second */
+
+	if (atp_is_geyser_3(dev)) {
+		if (!x && !y && !key) {
 			dev->idlecount++;
 			if (dev->idlecount == 10) {
 				dev->valid = 0;
 				schedule_work(&dev->work);
 			}
 		}
+		else
+			dev->idlecount = 0;
 	}
 
 	input_report_key(dev->input, BTN_LEFT, key);
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index 608674d..d7de4c5 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -97,6 +97,14 @@
 		.callback = lifebook_set_6byte_proto,
 	},
 	{
+		.ident = "CF-72",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "CF-72"),
+		},
+		.callback = lifebook_set_serio_phys,
+		.driver_data = "isa0060/serio3",
+	},
+	{
 		.ident = "Lifebook B142",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook B142"),
@@ -282,7 +290,7 @@
 int lifebook_init(struct psmouse *psmouse)
 {
 	struct input_dev *dev1 = psmouse->dev;
-	int max_coord = lifebook_use_6byte_proto ? 1024 : 4096;
+	int max_coord = lifebook_use_6byte_proto ? 4096 : 1024;
 
 	if (lifebook_absolute_mode(psmouse))
 		return -1;
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index b9f0fb2..0735257 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -648,9 +648,10 @@
 
 /*
  * Reset to defaults in case the device got confused by extended
- * protocol probes. Note that we do full reset becuase some mice
- * put themselves to sleep when see PSMOUSE_RESET_DIS.
+ * protocol probes. Note that we follow up with full reset because
+ * some mice put themselves to sleep when they see PSMOUSE_RESET_DIS.
  */
+	ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS);
 	psmouse_reset(psmouse);
 
 	if (max_proto >= PSMOUSE_IMEX && im_explorer_detect(psmouse, set_properties) == 0)
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 9173916..79146d6 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -61,9 +61,11 @@
 	int open;
 	int minor;
 	char name[16];
+	struct input_handle handle;
 	wait_queue_head_t wait;
 	struct list_head client_list;
-	struct input_handle handle;
+	spinlock_t client_lock; /* protects client_list */
+	struct mutex mutex;
 	struct device dev;
 
 	struct list_head mixdev_node;
@@ -113,108 +115,137 @@
 static struct input_handler mousedev_handler;
 
 static struct mousedev *mousedev_table[MOUSEDEV_MINORS];
+static DEFINE_MUTEX(mousedev_table_mutex);
 static struct mousedev *mousedev_mix;
 static LIST_HEAD(mousedev_mix_list);
 
+static void mixdev_open_devices(void);
+static void mixdev_close_devices(void);
+
 #define fx(i)  (mousedev->old_x[(mousedev->pkt_count - (i)) & 03])
 #define fy(i)  (mousedev->old_y[(mousedev->pkt_count - (i)) & 03])
 
-static void mousedev_touchpad_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_touchpad_event(struct input_dev *dev,
+				    struct mousedev *mousedev,
+				    unsigned int code, int value)
 {
 	int size, tmp;
 	enum { FRACTION_DENOM = 128 };
 
 	switch (code) {
-		case ABS_X:
-			fx(0) = value;
-			if (mousedev->touch && mousedev->pkt_count >= 2) {
-				size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
-				if (size == 0)
-					size = 256 * 2;
-				tmp = ((value - fx(2)) * (256 * FRACTION_DENOM)) / size;
-				tmp += mousedev->frac_dx;
-				mousedev->packet.dx = tmp / FRACTION_DENOM;
-				mousedev->frac_dx = tmp - mousedev->packet.dx * FRACTION_DENOM;
-			}
-			break;
 
-		case ABS_Y:
-			fy(0) = value;
-			if (mousedev->touch && mousedev->pkt_count >= 2) {
-				/* use X size to keep the same scale */
-				size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
-				if (size == 0)
-					size = 256 * 2;
-				tmp = -((value - fy(2)) * (256 * FRACTION_DENOM)) / size;
-				tmp += mousedev->frac_dy;
-				mousedev->packet.dy = tmp / FRACTION_DENOM;
-				mousedev->frac_dy = tmp - mousedev->packet.dy * FRACTION_DENOM;
-			}
-			break;
+	case ABS_X:
+		fx(0) = value;
+		if (mousedev->touch && mousedev->pkt_count >= 2) {
+			size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
+			if (size == 0)
+				size = 256 * 2;
+			tmp = ((value - fx(2)) * 256 * FRACTION_DENOM) / size;
+			tmp += mousedev->frac_dx;
+			mousedev->packet.dx = tmp / FRACTION_DENOM;
+			mousedev->frac_dx =
+				tmp - mousedev->packet.dx * FRACTION_DENOM;
+		}
+		break;
+
+	case ABS_Y:
+		fy(0) = value;
+		if (mousedev->touch && mousedev->pkt_count >= 2) {
+			/* use X size to keep the same scale */
+			size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
+			if (size == 0)
+				size = 256 * 2;
+			tmp = -((value - fy(2)) * 256 * FRACTION_DENOM) / size;
+			tmp += mousedev->frac_dy;
+			mousedev->packet.dy = tmp / FRACTION_DENOM;
+			mousedev->frac_dy = tmp -
+				mousedev->packet.dy * FRACTION_DENOM;
+		}
+		break;
 	}
 }
 
-static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev,
+				unsigned int code, int value)
 {
 	int size;
 
 	switch (code) {
-		case ABS_X:
-			size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
-			if (size == 0)
-				size = xres ? : 1;
-			if (value > dev->absmax[ABS_X])
-				value = dev->absmax[ABS_X];
-			if (value < dev->absmin[ABS_X])
-				value = dev->absmin[ABS_X];
-			mousedev->packet.x = ((value - dev->absmin[ABS_X]) * xres) / size;
-			mousedev->packet.abs_event = 1;
-			break;
 
-		case ABS_Y:
-			size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y];
-			if (size == 0)
-				size = yres ? : 1;
-			if (value > dev->absmax[ABS_Y])
-				value = dev->absmax[ABS_Y];
-			if (value < dev->absmin[ABS_Y])
-				value = dev->absmin[ABS_Y];
-			mousedev->packet.y = yres - ((value - dev->absmin[ABS_Y]) * yres) / size;
-			mousedev->packet.abs_event = 1;
-			break;
+	case ABS_X:
+		size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
+		if (size == 0)
+			size = xres ? : 1;
+		if (value > dev->absmax[ABS_X])
+			value = dev->absmax[ABS_X];
+		if (value < dev->absmin[ABS_X])
+			value = dev->absmin[ABS_X];
+		mousedev->packet.x =
+			((value - dev->absmin[ABS_X]) * xres) / size;
+		mousedev->packet.abs_event = 1;
+		break;
+
+	case ABS_Y:
+		size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y];
+		if (size == 0)
+			size = yres ? : 1;
+		if (value > dev->absmax[ABS_Y])
+			value = dev->absmax[ABS_Y];
+		if (value < dev->absmin[ABS_Y])
+			value = dev->absmin[ABS_Y];
+		mousedev->packet.y = yres -
+			((value - dev->absmin[ABS_Y]) * yres) / size;
+		mousedev->packet.abs_event = 1;
+		break;
 	}
 }
 
-static void mousedev_rel_event(struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_rel_event(struct mousedev *mousedev,
+				unsigned int code, int value)
 {
 	switch (code) {
-		case REL_X:	mousedev->packet.dx += value; break;
-		case REL_Y:	mousedev->packet.dy -= value; break;
-		case REL_WHEEL:	mousedev->packet.dz -= value; break;
+	case REL_X:
+		mousedev->packet.dx += value;
+		break;
+
+	case REL_Y:
+		mousedev->packet.dy -= value;
+		break;
+
+	case REL_WHEEL:
+		mousedev->packet.dz -= value;
+		break;
 	}
 }
 
-static void mousedev_key_event(struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_key_event(struct mousedev *mousedev,
+				unsigned int code, int value)
 {
 	int index;
 
 	switch (code) {
-		case BTN_TOUCH:
-		case BTN_0:
-		case BTN_LEFT:		index = 0; break;
-		case BTN_STYLUS:
-		case BTN_1:
-		case BTN_RIGHT:		index = 1; break;
-		case BTN_2:
-		case BTN_FORWARD:
-		case BTN_STYLUS2:
-		case BTN_MIDDLE:	index = 2; break;
-		case BTN_3:
-		case BTN_BACK:
-		case BTN_SIDE:		index = 3; break;
-		case BTN_4:
-		case BTN_EXTRA:		index = 4; break;
-		default:		return;
+
+	case BTN_TOUCH:
+	case BTN_0:
+	case BTN_LEFT:		index = 0; break;
+
+	case BTN_STYLUS:
+	case BTN_1:
+	case BTN_RIGHT:		index = 1; break;
+
+	case BTN_2:
+	case BTN_FORWARD:
+	case BTN_STYLUS2:
+	case BTN_MIDDLE:	index = 2; break;
+
+	case BTN_3:
+	case BTN_BACK:
+	case BTN_SIDE:		index = 3; break;
+
+	case BTN_4:
+	case BTN_EXTRA:		index = 4; break;
+
+	default:		return;
 	}
 
 	if (value) {
@@ -226,19 +257,23 @@
 	}
 }
 
-static void mousedev_notify_readers(struct mousedev *mousedev, struct mousedev_hw_data *packet)
+static void mousedev_notify_readers(struct mousedev *mousedev,
+				    struct mousedev_hw_data *packet)
 {
 	struct mousedev_client *client;
 	struct mousedev_motion *p;
-	unsigned long flags;
+	unsigned int new_head;
 	int wake_readers = 0;
 
-	list_for_each_entry(client, &mousedev->client_list, node) {
-		spin_lock_irqsave(&client->packet_lock, flags);
+	rcu_read_lock();
+	list_for_each_entry_rcu(client, &mousedev->client_list, node) {
+
+		/* Just acquire the lock, interrupts already disabled */
+		spin_lock(&client->packet_lock);
 
 		p = &client->packets[client->head];
 		if (client->ready && p->buttons != mousedev->packet.buttons) {
-			unsigned int new_head = (client->head + 1) % PACKET_QUEUE_LEN;
+			new_head = (client->head + 1) % PACKET_QUEUE_LEN;
 			if (new_head != client->tail) {
 				p = &client->packets[client->head = new_head];
 				memset(p, 0, sizeof(struct mousedev_motion));
@@ -253,25 +288,29 @@
 		}
 
 		client->pos_x += packet->dx;
-		client->pos_x = client->pos_x < 0 ? 0 : (client->pos_x >= xres ? xres : client->pos_x);
+		client->pos_x = client->pos_x < 0 ?
+			0 : (client->pos_x >= xres ? xres : client->pos_x);
 		client->pos_y += packet->dy;
-		client->pos_y = client->pos_y < 0 ? 0 : (client->pos_y >= yres ? yres : client->pos_y);
+		client->pos_y = client->pos_y < 0 ?
+			0 : (client->pos_y >= yres ? yres : client->pos_y);
 
 		p->dx += packet->dx;
 		p->dy += packet->dy;
 		p->dz += packet->dz;
 		p->buttons = mousedev->packet.buttons;
 
-		if (p->dx || p->dy || p->dz || p->buttons != client->last_buttons)
+		if (p->dx || p->dy || p->dz ||
+		    p->buttons != client->last_buttons)
 			client->ready = 1;
 
-		spin_unlock_irqrestore(&client->packet_lock, flags);
+		spin_unlock(&client->packet_lock);
 
 		if (client->ready) {
 			kill_fasync(&client->fasync, SIGIO, POLL_IN);
 			wake_readers = 1;
 		}
 	}
+	rcu_read_unlock();
 
 	if (wake_readers)
 		wake_up_interruptible(&mousedev->wait);
@@ -281,7 +320,8 @@
 {
 	if (!value) {
 		if (mousedev->touch &&
-		    time_before(jiffies, mousedev->touch + msecs_to_jiffies(tap_time))) {
+		    time_before(jiffies,
+				mousedev->touch + msecs_to_jiffies(tap_time))) {
 			/*
 			 * Toggle left button to emulate tap.
 			 * We rely on the fact that mousedev_mix always has 0
@@ -290,7 +330,8 @@
 			set_bit(0, &mousedev->packet.buttons);
 			set_bit(0, &mousedev_mix->packet.buttons);
 			mousedev_notify_readers(mousedev, &mousedev_mix->packet);
-			mousedev_notify_readers(mousedev_mix, &mousedev_mix->packet);
+			mousedev_notify_readers(mousedev_mix,
+						&mousedev_mix->packet);
 			clear_bit(0, &mousedev->packet.buttons);
 			clear_bit(0, &mousedev_mix->packet.buttons);
 		}
@@ -302,54 +343,61 @@
 		mousedev->touch = jiffies;
 }
 
-static void mousedev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+static void mousedev_event(struct input_handle *handle,
+			   unsigned int type, unsigned int code, int value)
 {
 	struct mousedev *mousedev = handle->private;
 
 	switch (type) {
-		case EV_ABS:
-			/* Ignore joysticks */
-			if (test_bit(BTN_TRIGGER, handle->dev->keybit))
-				return;
 
-			if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
-				mousedev_touchpad_event(handle->dev, mousedev, code, value);
+	case EV_ABS:
+		/* Ignore joysticks */
+		if (test_bit(BTN_TRIGGER, handle->dev->keybit))
+			return;
+
+		if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
+			mousedev_touchpad_event(handle->dev,
+						mousedev, code, value);
+		else
+			mousedev_abs_event(handle->dev, mousedev, code, value);
+
+		break;
+
+	case EV_REL:
+		mousedev_rel_event(mousedev, code, value);
+		break;
+
+	case EV_KEY:
+		if (value != 2) {
+			if (code == BTN_TOUCH &&
+			    test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
+				mousedev_touchpad_touch(mousedev, value);
 			else
-				mousedev_abs_event(handle->dev, mousedev, code, value);
+				mousedev_key_event(mousedev, code, value);
+		}
+		break;
 
-			break;
-
-		case EV_REL:
-			mousedev_rel_event(mousedev, code, value);
-			break;
-
-		case EV_KEY:
-			if (value != 2) {
-				if (code == BTN_TOUCH && test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
-					mousedev_touchpad_touch(mousedev, value);
-				else
-					mousedev_key_event(mousedev, code, value);
+	case EV_SYN:
+		if (code == SYN_REPORT) {
+			if (mousedev->touch) {
+				mousedev->pkt_count++;
+				/*
+				 * Input system eats duplicate events,
+				 * but we need all of them to do correct
+				 * averaging so apply present one forward
+				 */
+				fx(0) = fx(1);
+				fy(0) = fy(1);
 			}
-			break;
 
-		case EV_SYN:
-			if (code == SYN_REPORT) {
-				if (mousedev->touch) {
-					mousedev->pkt_count++;
-					/* Input system eats duplicate events, but we need all of them
-					 * to do correct averaging so apply present one forward
-					 */
-					fx(0) = fx(1);
-					fy(0) = fy(1);
-				}
+			mousedev_notify_readers(mousedev, &mousedev->packet);
+			mousedev_notify_readers(mousedev_mix, &mousedev->packet);
 
-				mousedev_notify_readers(mousedev, &mousedev->packet);
-				mousedev_notify_readers(mousedev_mix, &mousedev->packet);
-
-				mousedev->packet.dx = mousedev->packet.dy = mousedev->packet.dz = 0;
-				mousedev->packet.abs_event = 0;
-			}
-			break;
+			mousedev->packet.dx = mousedev->packet.dy =
+				mousedev->packet.dz = 0;
+			mousedev->packet.abs_event = 0;
+		}
+		break;
 	}
 }
 
@@ -367,41 +415,48 @@
 {
 	struct mousedev *mousedev = container_of(dev, struct mousedev, dev);
 
-	mousedev_table[mousedev->minor] = NULL;
 	kfree(mousedev);
 }
 
-static int mixdev_add_device(struct mousedev *mousedev)
+static int mousedev_open_device(struct mousedev *mousedev)
 {
-	int error;
+	int retval;
 
-	if (mousedev_mix->open) {
-		error = input_open_device(&mousedev->handle);
-		if (error)
-			return error;
+	retval = mutex_lock_interruptible(&mousedev->mutex);
+	if (retval)
+		return retval;
 
-		mousedev->open++;
-		mousedev->mixdev_open = 1;
+	if (mousedev->minor == MOUSEDEV_MIX)
+		mixdev_open_devices();
+	else if (!mousedev->exist)
+		retval = -ENODEV;
+	else if (!mousedev->open++) {
+		retval = input_open_device(&mousedev->handle);
+		if (retval)
+			mousedev->open--;
 	}
 
-	get_device(&mousedev->dev);
-	list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list);
-
-	return 0;
+	mutex_unlock(&mousedev->mutex);
+	return retval;
 }
 
-static void mixdev_remove_device(struct mousedev *mousedev)
+static void mousedev_close_device(struct mousedev *mousedev)
 {
-	if (mousedev->mixdev_open) {
-		mousedev->mixdev_open = 0;
-		if (!--mousedev->open && mousedev->exist)
-			input_close_device(&mousedev->handle);
-	}
+	mutex_lock(&mousedev->mutex);
 
-	list_del_init(&mousedev->mixdev_node);
-	put_device(&mousedev->dev);
+	if (mousedev->minor == MOUSEDEV_MIX)
+		mixdev_close_devices();
+	else if (mousedev->exist && !--mousedev->open)
+		input_close_device(&mousedev->handle);
+
+	mutex_unlock(&mousedev->mutex);
 }
 
+/*
+ * Open all available devices so they can all be multiplexed in one.
+ * stream. Note that this function is called with mousedev_mix->mutex
+ * held.
+ */
 static void mixdev_open_devices(void)
 {
 	struct mousedev *mousedev;
@@ -411,16 +466,19 @@
 
 	list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) {
 		if (!mousedev->mixdev_open) {
-			if (!mousedev->open && mousedev->exist)
-				if (input_open_device(&mousedev->handle))
-					continue;
+			if (mousedev_open_device(mousedev))
+				continue;
 
-			mousedev->open++;
 			mousedev->mixdev_open = 1;
 		}
 	}
 }
 
+/*
+ * Close all devices that were opened as part of multiplexed
+ * device. Note that this function is called with mousedev_mix->mutex
+ * held.
+ */
 static void mixdev_close_devices(void)
 {
 	struct mousedev *mousedev;
@@ -431,33 +489,45 @@
 	list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) {
 		if (mousedev->mixdev_open) {
 			mousedev->mixdev_open = 0;
-			if (!--mousedev->open && mousedev->exist)
-				input_close_device(&mousedev->handle);
+			mousedev_close_device(mousedev);
 		}
 	}
 }
 
+
+static void mousedev_attach_client(struct mousedev *mousedev,
+				   struct mousedev_client *client)
+{
+	spin_lock(&mousedev->client_lock);
+	list_add_tail_rcu(&client->node, &mousedev->client_list);
+	spin_unlock(&mousedev->client_lock);
+	synchronize_rcu();
+}
+
+static void mousedev_detach_client(struct mousedev *mousedev,
+				   struct mousedev_client *client)
+{
+	spin_lock(&mousedev->client_lock);
+	list_del_rcu(&client->node);
+	spin_unlock(&mousedev->client_lock);
+	synchronize_rcu();
+}
+
 static int mousedev_release(struct inode *inode, struct file *file)
 {
 	struct mousedev_client *client = file->private_data;
 	struct mousedev *mousedev = client->mousedev;
 
 	mousedev_fasync(-1, file, 0);
-
-	list_del(&client->node);
+	mousedev_detach_client(mousedev, client);
 	kfree(client);
 
-	if (mousedev->minor == MOUSEDEV_MIX)
-		mixdev_close_devices();
-	else if (!--mousedev->open && mousedev->exist)
-		input_close_device(&mousedev->handle);
-
+	mousedev_close_device(mousedev);
 	put_device(&mousedev->dev);
 
 	return 0;
 }
 
-
 static int mousedev_open(struct inode *inode, struct file *file)
 {
 	struct mousedev_client *client;
@@ -475,12 +545,17 @@
 	if (i >= MOUSEDEV_MINORS)
 		return -ENODEV;
 
+	error = mutex_lock_interruptible(&mousedev_table_mutex);
+	if (error)
+		return error;
 	mousedev = mousedev_table[i];
+	if (mousedev)
+		get_device(&mousedev->dev);
+	mutex_unlock(&mousedev_table_mutex);
+
 	if (!mousedev)
 		return -ENODEV;
 
-	get_device(&mousedev->dev);
-
 	client = kzalloc(sizeof(struct mousedev_client), GFP_KERNEL);
 	if (!client) {
 		error = -ENOMEM;
@@ -491,21 +566,17 @@
 	client->pos_x = xres / 2;
 	client->pos_y = yres / 2;
 	client->mousedev = mousedev;
-	list_add_tail(&client->node, &mousedev->client_list);
+	mousedev_attach_client(mousedev, client);
 
-	if (mousedev->minor == MOUSEDEV_MIX)
-		mixdev_open_devices();
-	else if (!mousedev->open++ && mousedev->exist) {
-		error = input_open_device(&mousedev->handle);
-		if (error)
-			goto err_free_client;
-	}
+	error = mousedev_open_device(mousedev);
+	if (error)
+		goto err_free_client;
 
 	file->private_data = client;
 	return 0;
 
  err_free_client:
-	list_del(&client->node);
+	mousedev_detach_client(mousedev, client);
 	kfree(client);
  err_put_mousedev:
 	put_device(&mousedev->dev);
@@ -517,41 +588,41 @@
 	return delta > limit ? limit : (delta < -limit ? -limit : delta);
 }
 
-static void mousedev_packet(struct mousedev_client *client, signed char *ps2_data)
+static void mousedev_packet(struct mousedev_client *client,
+			    signed char *ps2_data)
 {
-	struct mousedev_motion *p;
-	unsigned long flags;
+	struct mousedev_motion *p = &client->packets[client->tail];
 
-	spin_lock_irqsave(&client->packet_lock, flags);
-	p = &client->packets[client->tail];
-
-	ps2_data[0] = 0x08 | ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07);
+	ps2_data[0] = 0x08 |
+		((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07);
 	ps2_data[1] = mousedev_limit_delta(p->dx, 127);
 	ps2_data[2] = mousedev_limit_delta(p->dy, 127);
 	p->dx -= ps2_data[1];
 	p->dy -= ps2_data[2];
 
 	switch (client->mode) {
-		case MOUSEDEV_EMUL_EXPS:
-			ps2_data[3] = mousedev_limit_delta(p->dz, 7);
-			p->dz -= ps2_data[3];
-			ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1);
-			client->bufsiz = 4;
-			break;
+	case MOUSEDEV_EMUL_EXPS:
+		ps2_data[3] = mousedev_limit_delta(p->dz, 7);
+		p->dz -= ps2_data[3];
+		ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1);
+		client->bufsiz = 4;
+		break;
 
-		case MOUSEDEV_EMUL_IMPS:
-			ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
-			ps2_data[3] = mousedev_limit_delta(p->dz, 127);
-			p->dz -= ps2_data[3];
-			client->bufsiz = 4;
-			break;
+	case MOUSEDEV_EMUL_IMPS:
+		ps2_data[0] |=
+			((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
+		ps2_data[3] = mousedev_limit_delta(p->dz, 127);
+		p->dz -= ps2_data[3];
+		client->bufsiz = 4;
+		break;
 
-		case MOUSEDEV_EMUL_PS2:
-		default:
-			ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
-			p->dz = 0;
-			client->bufsiz = 3;
-			break;
+	case MOUSEDEV_EMUL_PS2:
+	default:
+		ps2_data[0] |=
+			((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
+		p->dz = 0;
+		client->bufsiz = 3;
+		break;
 	}
 
 	if (!p->dx && !p->dy && !p->dz) {
@@ -561,12 +632,56 @@
 		} else
 			client->tail = (client->tail + 1) % PACKET_QUEUE_LEN;
 	}
-
-	spin_unlock_irqrestore(&client->packet_lock, flags);
 }
 
+static void mousedev_generate_response(struct mousedev_client *client,
+					int command)
+{
+	client->ps2[0] = 0xfa; /* ACK */
 
-static ssize_t mousedev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+	switch (command) {
+
+	case 0xeb: /* Poll */
+		mousedev_packet(client, &client->ps2[1]);
+		client->bufsiz++; /* account for leading ACK */
+		break;
+
+	case 0xf2: /* Get ID */
+		switch (client->mode) {
+		case MOUSEDEV_EMUL_PS2:
+			client->ps2[1] = 0;
+			break;
+		case MOUSEDEV_EMUL_IMPS:
+			client->ps2[1] = 3;
+			break;
+		case MOUSEDEV_EMUL_EXPS:
+			client->ps2[1] = 4;
+			break;
+		}
+		client->bufsiz = 2;
+		break;
+
+	case 0xe9: /* Get info */
+		client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200;
+		client->bufsiz = 4;
+		break;
+
+	case 0xff: /* Reset */
+		client->impsseq = client->imexseq = 0;
+		client->mode = MOUSEDEV_EMUL_PS2;
+		client->ps2[1] = 0xaa; client->ps2[2] = 0x00;
+		client->bufsiz = 3;
+		break;
+
+	default:
+		client->bufsiz = 1;
+		break;
+	}
+	client->buffer = client->bufsiz;
+}
+
+static ssize_t mousedev_write(struct file *file, const char __user *buffer,
+				size_t count, loff_t *ppos)
 {
 	struct mousedev_client *client = file->private_data;
 	unsigned char c;
@@ -577,6 +692,8 @@
 		if (get_user(c, buffer + i))
 			return -EFAULT;
 
+		spin_lock_irq(&client->packet_lock);
+
 		if (c == mousedev_imex_seq[client->imexseq]) {
 			if (++client->imexseq == MOUSEDEV_SEQ_LEN) {
 				client->imexseq = 0;
@@ -593,68 +710,39 @@
 		} else
 			client->impsseq = 0;
 
-		client->ps2[0] = 0xfa;
+		mousedev_generate_response(client, c);
 
-		switch (c) {
-
-			case 0xeb: /* Poll */
-				mousedev_packet(client, &client->ps2[1]);
-				client->bufsiz++; /* account for leading ACK */
-				break;
-
-			case 0xf2: /* Get ID */
-				switch (client->mode) {
-					case MOUSEDEV_EMUL_PS2:  client->ps2[1] = 0; break;
-					case MOUSEDEV_EMUL_IMPS: client->ps2[1] = 3; break;
-					case MOUSEDEV_EMUL_EXPS: client->ps2[1] = 4; break;
-				}
-				client->bufsiz = 2;
-				break;
-
-			case 0xe9: /* Get info */
-				client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200;
-				client->bufsiz = 4;
-				break;
-
-			case 0xff: /* Reset */
-				client->impsseq = client->imexseq = 0;
-				client->mode = MOUSEDEV_EMUL_PS2;
-				client->ps2[1] = 0xaa; client->ps2[2] = 0x00;
-				client->bufsiz = 3;
-				break;
-
-			default:
-				client->bufsiz = 1;
-				break;
-		}
-
-		client->buffer = client->bufsiz;
+		spin_unlock_irq(&client->packet_lock);
 	}
 
 	kill_fasync(&client->fasync, SIGIO, POLL_IN);
-
 	wake_up_interruptible(&client->mousedev->wait);
 
 	return count;
 }
 
-static ssize_t mousedev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+static ssize_t mousedev_read(struct file *file, char __user *buffer,
+			     size_t count, loff_t *ppos)
 {
 	struct mousedev_client *client = file->private_data;
+	struct mousedev *mousedev = client->mousedev;
+	signed char data[sizeof(client->ps2)];
 	int retval = 0;
 
-	if (!client->ready && !client->buffer && (file->f_flags & O_NONBLOCK))
+	if (!client->ready && !client->buffer && mousedev->exist &&
+	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
-	retval = wait_event_interruptible(client->mousedev->wait,
-					  !client->mousedev->exist || client->ready || client->buffer);
-
+	retval = wait_event_interruptible(mousedev->wait,
+			!mousedev->exist || client->ready || client->buffer);
 	if (retval)
 		return retval;
 
-	if (!client->mousedev->exist)
+	if (!mousedev->exist)
 		return -ENODEV;
 
+	spin_lock_irq(&client->packet_lock);
+
 	if (!client->buffer && client->ready) {
 		mousedev_packet(client, client->ps2);
 		client->buffer = client->bufsiz;
@@ -663,9 +751,12 @@
 	if (count > client->buffer)
 		count = client->buffer;
 
+	memcpy(data, client->ps2 + client->bufsiz - client->buffer, count);
 	client->buffer -= count;
 
-	if (copy_to_user(buffer, client->ps2 + client->bufsiz - client->buffer - count, count))
+	spin_unlock_irq(&client->packet_lock);
+
+	if (copy_to_user(buffer, data, count))
 		return -EFAULT;
 
 	return count;
@@ -692,6 +783,60 @@
 	.fasync =	mousedev_fasync,
 };
 
+static int mousedev_install_chrdev(struct mousedev *mousedev)
+{
+	mousedev_table[mousedev->minor] = mousedev;
+	return 0;
+}
+
+static void mousedev_remove_chrdev(struct mousedev *mousedev)
+{
+	mutex_lock(&mousedev_table_mutex);
+	mousedev_table[mousedev->minor] = NULL;
+	mutex_unlock(&mousedev_table_mutex);
+}
+
+/*
+ * Mark device non-existent. This disables writes, ioctls and
+ * prevents new users from opening the device. Already posted
+ * blocking reads will stay, however new ones will fail.
+ */
+static void mousedev_mark_dead(struct mousedev *mousedev)
+{
+	mutex_lock(&mousedev->mutex);
+	mousedev->exist = 0;
+	mutex_unlock(&mousedev->mutex);
+}
+
+/*
+ * Wake up users waiting for IO so they can disconnect from
+ * dead device.
+ */
+static void mousedev_hangup(struct mousedev *mousedev)
+{
+	struct mousedev_client *client;
+
+	spin_lock(&mousedev->client_lock);
+	list_for_each_entry(client, &mousedev->client_list, node)
+		kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+	spin_unlock(&mousedev->client_lock);
+
+	wake_up_interruptible(&mousedev->wait);
+}
+
+static void mousedev_cleanup(struct mousedev *mousedev)
+{
+	struct input_handle *handle = &mousedev->handle;
+
+	mousedev_mark_dead(mousedev);
+	mousedev_hangup(mousedev);
+	mousedev_remove_chrdev(mousedev);
+
+	/* mousedev is marked dead so no one else accesses mousedev->open */
+	if (mousedev->open)
+		input_close_device(handle);
+}
+
 static struct mousedev *mousedev_create(struct input_dev *dev,
 					struct input_handler *handler,
 					int minor)
@@ -707,6 +852,10 @@
 
 	INIT_LIST_HEAD(&mousedev->client_list);
 	INIT_LIST_HEAD(&mousedev->mixdev_node);
+	spin_lock_init(&mousedev->client_lock);
+	mutex_init(&mousedev->mutex);
+	lockdep_set_subclass(&mousedev->mutex,
+			     minor == MOUSEDEV_MIX ? MOUSEDEV_MIX : 0);
 	init_waitqueue_head(&mousedev->wait);
 
 	if (minor == MOUSEDEV_MIX)
@@ -731,14 +880,27 @@
 	mousedev->dev.release = mousedev_free;
 	device_initialize(&mousedev->dev);
 
-	mousedev_table[minor] = mousedev;
+	if (minor != MOUSEDEV_MIX) {
+		error = input_register_handle(&mousedev->handle);
+		if (error)
+			goto err_free_mousedev;
+	}
+
+	error = mousedev_install_chrdev(mousedev);
+	if (error)
+		goto err_unregister_handle;
 
 	error = device_add(&mousedev->dev);
 	if (error)
-		goto err_free_mousedev;
+		goto err_cleanup_mousedev;
 
 	return mousedev;
 
+ err_cleanup_mousedev:
+	mousedev_cleanup(mousedev);
+ err_unregister_handle:
+	if (minor != MOUSEDEV_MIX)
+		input_unregister_handle(&mousedev->handle);
  err_free_mousedev:
 	put_device(&mousedev->dev);
  err_out:
@@ -747,29 +909,64 @@
 
 static void mousedev_destroy(struct mousedev *mousedev)
 {
-	struct mousedev_client *client;
-
 	device_del(&mousedev->dev);
-	mousedev->exist = 0;
+	mousedev_cleanup(mousedev);
+	if (mousedev->minor != MOUSEDEV_MIX)
+		input_unregister_handle(&mousedev->handle);
+	put_device(&mousedev->dev);
+}
 
-	if (mousedev->open) {
-		input_close_device(&mousedev->handle);
-		list_for_each_entry(client, &mousedev->client_list, node)
-			kill_fasync(&client->fasync, SIGIO, POLL_HUP);
-		wake_up_interruptible(&mousedev->wait);
+static int mixdev_add_device(struct mousedev *mousedev)
+{
+	int retval;
+
+	retval = mutex_lock_interruptible(&mousedev_mix->mutex);
+	if (retval)
+		return retval;
+
+	if (mousedev_mix->open) {
+		retval = mousedev_open_device(mousedev);
+		if (retval)
+			goto out;
+
+		mousedev->mixdev_open = 1;
 	}
 
+	get_device(&mousedev->dev);
+	list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list);
+
+ out:
+	mutex_unlock(&mousedev_mix->mutex);
+	return retval;
+}
+
+static void mixdev_remove_device(struct mousedev *mousedev)
+{
+	mutex_lock(&mousedev_mix->mutex);
+
+	if (mousedev->mixdev_open) {
+		mousedev->mixdev_open = 0;
+		mousedev_close_device(mousedev);
+	}
+
+	list_del_init(&mousedev->mixdev_node);
+	mutex_unlock(&mousedev_mix->mutex);
+
 	put_device(&mousedev->dev);
 }
 
-static int mousedev_connect(struct input_handler *handler, struct input_dev *dev,
+static int mousedev_connect(struct input_handler *handler,
+			    struct input_dev *dev,
 			    const struct input_device_id *id)
 {
 	struct mousedev *mousedev;
 	int minor;
 	int error;
 
-	for (minor = 0; minor < MOUSEDEV_MINORS && mousedev_table[minor]; minor++);
+	for (minor = 0; minor < MOUSEDEV_MINORS; minor++)
+		if (!mousedev_table[minor])
+			break;
+
 	if (minor == MOUSEDEV_MINORS) {
 		printk(KERN_ERR "mousedev: no more free mousedev devices\n");
 		return -ENFILE;
@@ -779,21 +976,13 @@
 	if (IS_ERR(mousedev))
 		return PTR_ERR(mousedev);
 
-	error = input_register_handle(&mousedev->handle);
-	if (error)
-		goto err_delete_mousedev;
-
 	error = mixdev_add_device(mousedev);
-	if (error)
-		goto err_unregister_handle;
+	if (error) {
+		mousedev_destroy(mousedev);
+		return error;
+	}
 
 	return 0;
-
- err_unregister_handle:
-	input_unregister_handle(&mousedev->handle);
- err_delete_mousedev:
-	device_unregister(&mousedev->dev);
-	return error;
 }
 
 static void mousedev_disconnect(struct input_handle *handle)
@@ -801,33 +990,42 @@
 	struct mousedev *mousedev = handle->private;
 
 	mixdev_remove_device(mousedev);
-	input_unregister_handle(handle);
 	mousedev_destroy(mousedev);
 }
 
 static const struct input_device_id mousedev_ids[] = {
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_KEYBIT |
+				INPUT_DEVICE_ID_MATCH_RELBIT,
 		.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
 		.keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
 		.relbit = { BIT(REL_X) | BIT(REL_Y) },
-	},	/* A mouse like device, at least one button, two relative axes */
+	},	/* A mouse like device, at least one button,
+		   two relative axes */
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_RELBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_RELBIT,
 		.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
 		.relbit = { BIT(REL_WHEEL) },
 	},	/* A separate scrollwheel */
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_KEYBIT |
+				INPUT_DEVICE_ID_MATCH_ABSBIT,
 		.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
 		.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
 		.absbit = { BIT(ABS_X) | BIT(ABS_Y) },
-	},	/* A tablet like device, at least touch detection, two absolute axes */
+	},	/* A tablet like device, at least touch detection,
+		   two absolute axes */
 	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_KEYBIT |
+				INPUT_DEVICE_ID_MATCH_ABSBIT,
 		.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
 		.keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) },
-		.absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | BIT(ABS_TOOL_WIDTH) },
+		.absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) |
+				BIT(ABS_TOOL_WIDTH) },
 	},	/* A touchpad */
 
 	{ },	/* Terminating entry */
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index c2eea27..11dafc0 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -385,6 +385,8 @@
 	i8042_ctr |= I8042_CTR_KBDINT;
 
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
+		i8042_ctr &= ~I8042_CTR_KBDINT;
+		i8042_ctr |= I8042_CTR_KBDDIS;
 		printk(KERN_ERR "i8042.c: Failed to enable KBD port.\n");
 		return -EIO;
 	}
@@ -402,6 +404,8 @@
 	i8042_ctr |= I8042_CTR_AUXINT;
 
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
+		i8042_ctr &= ~I8042_CTR_AUXINT;
+		i8042_ctr |= I8042_CTR_AUXDIS;
 		printk(KERN_ERR "i8042.c: Failed to enable AUX port.\n");
 		return -EIO;
 	}
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index f929fcd..e3e0baa 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -126,6 +126,16 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called hp680_ts_input.
 
+config TOUCHSCREEN_HP7XX
+	tristate "HP Jornada 710/720/728 touchscreen"
+	depends on SA1100_JORNADA720_SSP
+	help
+	  Say Y here if you have a HP Jornada 710/720/728 and want
+	  to support the built-in touchscreen.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called jornada720_ts.
+
 config TOUCHSCREEN_PENMOUNT
 	tristate "Penmount serial touchscreen"
 	select SERIO
@@ -191,6 +201,7 @@
 	  - Gunze AHL61
 	  - DMC TSC-10/25
 	  - IRTOUCHSYSTEMS/UNITOP
+	  - IdealTEK URTC1000
 
 	  Have a look at <http://linux.chapter7.ch/touchkit/> for
 	  a usage description and the required user-space stuff.
@@ -238,4 +249,14 @@
 	bool "IRTOUCHSYSTEMS/UNITOP device support" if EMBEDDED
 	depends on TOUCHSCREEN_USB_COMPOSITE
 
+config TOUCHSCREEN_USB_IDEALTEK
+	default y
+	bool "IdealTEK URTC1000 device support" if EMBEDDED
+	depends on TOUCHSCREEN_USB_COMPOSITE
+
+config TOUCHSCREEN_USB_GENERAL_TOUCH
+	default y
+	bool "GeneralTouch Touchscreen device support" if EMBEDDED
+	depends on TOUCHSCREEN_USB_COMPOSITE
+
 endif
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 5de8933..35d4097 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -13,6 +13,7 @@
 obj-$(CONFIG_TOUCHSCREEN_MTOUCH)	+= mtouch.o
 obj-$(CONFIG_TOUCHSCREEN_MK712)		+= mk712.o
 obj-$(CONFIG_TOUCHSCREEN_HP600)		+= hp680_ts_input.o
+obj-$(CONFIG_TOUCHSCREEN_HP7XX)		+= jornada720_ts.o
 obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE)	+= usbtouchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_PENMOUNT)	+= penmount.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o
diff --git a/drivers/input/touchscreen/jornada720_ts.c b/drivers/input/touchscreen/jornada720_ts.c
new file mode 100644
index 0000000..42a1c9a
--- /dev/null
+++ b/drivers/input/touchscreen/jornada720_ts.c
@@ -0,0 +1,182 @@
+/*
+ * drivers/input/touchscreen/jornada720_ts.c
+ *
+ * Copyright (C) 2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
+ *
+ *  Copyright (C) 2006 Filip Zyzniewski <filip.zyzniewski@tefnet.pl>
+ *  based on HP Jornada 56x touchscreen driver by Alex Lange <chicken@handhelds.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * HP Jornada 710/720/729 Touchscreen Driver
+ */
+
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/hardware.h>
+#include <asm/arch/jornada720.h>
+
+MODULE_AUTHOR("Kristoffer Ericson <kristoffer.ericson@gmail.com>");
+MODULE_DESCRIPTION("HP Jornada 710/720/728 touchscreen driver");
+MODULE_LICENSE("GPLv2");
+
+struct jornada_ts {
+	struct input_dev *dev;
+	int x_data[4];		/* X sample values */
+	int y_data[4];		/* Y sample values */
+};
+
+static void jornada720_ts_collect_data(struct jornada_ts *jornada_ts)
+{
+
+    /* 3 low word X samples */
+    jornada_ts->x_data[0] = jornada_ssp_byte(TXDUMMY);
+    jornada_ts->x_data[1] = jornada_ssp_byte(TXDUMMY);
+    jornada_ts->x_data[2] = jornada_ssp_byte(TXDUMMY);
+
+    /* 3 low word Y samples */
+    jornada_ts->y_data[0] = jornada_ssp_byte(TXDUMMY);
+    jornada_ts->y_data[1] = jornada_ssp_byte(TXDUMMY);
+    jornada_ts->y_data[2] = jornada_ssp_byte(TXDUMMY);
+
+    /* combined x samples bits */
+    jornada_ts->x_data[3] = jornada_ssp_byte(TXDUMMY);
+
+    /* combined y samples bits */
+    jornada_ts->y_data[3] = jornada_ssp_byte(TXDUMMY);
+}
+
+static int jornada720_ts_average(int coords[4])
+{
+	int coord, high_bits = coords[3];
+
+	coord  = coords[0] | ((high_bits & 0x03) << 8);
+	coord += coords[1] | ((high_bits & 0x0c) << 6);
+	coord += coords[2] | ((high_bits & 0x30) << 4);
+
+	return coord / 3;
+}
+
+static irqreturn_t jornada720_ts_interrupt(int irq, void *dev_id)
+{
+	struct platform_device *pdev = dev_id;
+	struct jornada_ts *jornada_ts = platform_get_drvdata(pdev);
+	struct input_dev *input = jornada_ts->dev;
+	int x, y;
+
+	/* If GPIO_GPIO9 is set to high then report pen up */
+	if (GPLR & GPIO_GPIO(9)) {
+		input_report_key(input, BTN_TOUCH, 0);
+		input_sync(input);
+	} else {
+		jornada_ssp_start();
+
+		/* proper reply to request is always TXDUMMY */
+		if (jornada_ssp_inout(GETTOUCHSAMPLES) == TXDUMMY) {
+			jornada720_ts_collect_data(jornada_ts);
+
+			x = jornada720_ts_average(jornada_ts->x_data);
+			y = jornada720_ts_average(jornada_ts->y_data);
+
+			input_report_key(input, BTN_TOUCH, 1);
+			input_report_abs(input, ABS_X, x);
+			input_report_abs(input, ABS_Y, y);
+			input_sync(input);
+		}
+
+		jornada_ssp_end();
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit jornada720_ts_probe(struct platform_device *pdev)
+{
+	struct jornada_ts *jornada_ts;
+	struct input_dev *input_dev;
+	int error;
+
+	jornada_ts = kzalloc(sizeof(struct jornada_ts), GFP_KERNEL);
+	input_dev = input_allocate_device();
+
+	if (!jornada_ts || !input_dev) {
+		error = -ENOMEM;
+		goto fail1;
+	}
+
+	platform_set_drvdata(pdev, jornada_ts);
+
+	jornada_ts->dev = input_dev;
+
+	input_dev->name = "HP Jornada 7xx Touchscreen";
+	input_dev->phys = "jornadats/input0";
+	input_dev->id.bustype = BUS_HOST;
+	input_dev->dev.parent = &pdev->dev;
+
+	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_set_abs_params(input_dev, ABS_X, 270, 3900, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 180, 3700, 0, 0);
+
+	error = request_irq(IRQ_GPIO9,
+			jornada720_ts_interrupt,
+			IRQF_DISABLED | IRQF_TRIGGER_RISING,
+			"HP7XX Touchscreen driver", pdev);
+	if (error) {
+		printk(KERN_INFO "HP7XX TS : Unable to acquire irq!\n");
+		goto fail1;
+	}
+
+	error = input_register_device(jornada_ts->dev);
+	if (error)
+		goto fail2;
+
+	return 0;
+
+ fail2:
+	free_irq(IRQ_GPIO9, pdev);
+ fail1:
+	platform_set_drvdata(pdev, NULL);
+	input_free_device(input_dev);
+	kfree(jornada_ts);
+	return error;
+}
+
+static int __devexit jornada720_ts_remove(struct platform_device *pdev)
+{
+	struct jornada_ts *jornada_ts = platform_get_drvdata(pdev);
+
+	free_irq(IRQ_GPIO9, pdev);
+	platform_set_drvdata(pdev, NULL);
+	input_unregister_device(jornada_ts->dev);
+	kfree(jornada_ts);
+
+	return 0;
+}
+
+static struct platform_driver jornada720_ts_driver = {
+	.probe		= jornada720_ts_probe,
+	.remove		= __devexit_p(jornada720_ts_remove),
+	.driver		= {
+		.name	= "jornada_ts",
+	},
+};
+
+static int __init jornada720_ts_init(void)
+{
+	return platform_driver_register(&jornada720_ts_driver);
+}
+
+static void __exit jornada720_ts_exit(void)
+{
+	platform_driver_unregister(&jornada720_ts_driver);
+}
+
+module_init(jornada720_ts_init);
+module_exit(jornada720_ts_exit);
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 36f9440..86aed64 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -130,8 +130,7 @@
 		if (val & UCB_ADC_DAT_VALID)
 			break;
 		/* yield to other processes */
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(1);
+		schedule_timeout_uninterruptible(1);
 	}
 
 	return UCB_ADC_DAT_VALUE(val);
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 741f6c6..9fb3d5c3 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -10,6 +10,7 @@
  *  - Gunze AHL61
  *  - DMC TSC-10/25
  *  - IRTOUCHSYSTEMS/UNITOP
+ *  - IdealTEK URTC1000
  *
  * Copyright (C) 2004-2006 by Daniel Ritz <daniel.ritz@gmx.ch>
  * Copyright (C) by Todd E. Johnson (mtouchusb.c)
@@ -92,7 +93,7 @@
 };
 
 
-#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO)
+#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO) || defined(CONFIG_TOUCHSCREEN_USB_IDEALTEK)
 #define MULTI_PACKET
 #endif
 
@@ -112,6 +113,8 @@
 	DEVTYPE_GUNZE,
 	DEVTYPE_DMC_TSC10,
 	DEVTYPE_IRTOUCH,
+	DEVTYPE_IDEALTEK,
+	DEVTYPE_GENERAL_TOUCH,
 };
 
 static struct usb_device_id usbtouch_devices[] = {
@@ -157,6 +160,14 @@
 	{USB_DEVICE(0x6615, 0x0001), .driver_info = DEVTYPE_IRTOUCH},
 #endif
 
+#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
+	{USB_DEVICE(0x1391, 0x1000), .driver_info = DEVTYPE_IDEALTEK},
+#endif
+
+#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
+	{USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH},
+#endif
+
 	{}
 };
 
@@ -396,7 +407,8 @@
 	                      TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT);
 	if (ret < 0)
 		return ret;
-	if (buf[0] != 0x06 || buf[1] != 0x00)
+	if ((buf[0] != 0x06 || buf[1] != 0x00) &&
+	    (buf[0] != 0x15 || buf[1] != 0x01))
 		return -ENODEV;
 
 	/* start sending data */
@@ -438,6 +450,57 @@
 
 
 /*****************************************************************************
+ * IdealTEK URTC1000 Part
+ */
+#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
+static int idealtek_get_pkt_len(unsigned char *buf, int len)
+{
+	if (buf[0] & 0x80)
+		return 5;
+	if (buf[0] == 0x01)
+		return len;
+	return 0;
+}
+
+static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
+{
+	switch (pkt[0] & 0x98) {
+	case 0x88:
+		/* touch data in IdealTEK mode */
+		dev->x = (pkt[1] << 5) | (pkt[2] >> 2);
+		dev->y = (pkt[3] << 5) | (pkt[4] >> 2);
+		dev->touch = (pkt[0] & 0x40) ? 1 : 0;
+		return 1;
+
+	case 0x98:
+		/* touch data in MT emulation mode */
+		dev->x = (pkt[2] << 5) | (pkt[1] >> 2);
+		dev->y = (pkt[4] << 5) | (pkt[3] >> 2);
+		dev->touch = (pkt[0] & 0x40) ? 1 : 0;
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+#endif
+
+/*****************************************************************************
+ * General Touch Part
+ */
+#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
+static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
+{
+	dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1] ;
+	dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3] ;
+	dev->press = pkt[5] & 0xff;
+	dev->touch = pkt[0] & 0x01;
+
+	return 1;
+}
+#endif
+
+/*****************************************************************************
  * the different device descriptors
  */
 static struct usbtouch_device_info usbtouch_dev_info[] = {
@@ -537,6 +600,32 @@
 		.read_data	= irtouch_read_data,
 	},
 #endif
+
+#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
+	[DEVTYPE_IDEALTEK] = {
+		.min_xc		= 0x0,
+		.max_xc		= 0x0fff,
+		.min_yc		= 0x0,
+		.max_yc		= 0x0fff,
+		.rept_size	= 8,
+		.flags		= USBTOUCH_FLG_BUFFER,
+		.process_pkt	= usbtouch_process_multi,
+		.get_pkt_len	= idealtek_get_pkt_len,
+		.read_data	= idealtek_read_data,
+	},
+#endif
+
+#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
+	[DEVTYPE_GENERAL_TOUCH] = {
+		.min_xc		= 0x0,
+		.max_xc		= 0x0500,
+		.min_yc		= 0x0,
+		.max_yc		= 0x0500,
+		.rept_size	= 7,
+		.read_data	= general_touch_read_data,
+	}
+#endif
+
 };
 
 
diff --git a/drivers/input/tsdev.c b/drivers/input/tsdev.c
deleted file mode 100644
index d2f882e..0000000
--- a/drivers/input/tsdev.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * $Id: tsdev.c,v 1.15 2002/04/10 16:50:19 jsimmons Exp $
- *
- *  Copyright (c) 2001 "Crazy" james Simmons
- *
- *  Compaq touchscreen protocol driver. The protocol emulated by this driver
- *  is obsolete; for new programs use the tslib library which can read directly
- *  from evdev and perform dejittering, variance filtering and calibration -
- *  all in user space, not at kernel level. The meaning of this driver is
- *  to allow usage of newer input drivers with old applications that use the
- *  old /dev/h3600_ts and /dev/h3600_tsraw devices.
- *
- *  09-Apr-2004: Andrew Zabolotny <zap@homelink.ru>
- *      Fixed to actually work, not just output random numbers.
- *      Added support for both h3600_ts and h3600_tsraw protocol
- *      emulation.
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <jsimmons@infradead.org>.
- */
-
-#define TSDEV_MINOR_BASE	128
-#define TSDEV_MINORS		32
-/* First 16 devices are h3600_ts compatible; second 16 are h3600_tsraw */
-#define TSDEV_MINOR_MASK	15
-#define TSDEV_BUFFER_SIZE	64
-
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/input.h>
-#include <linux/major.h>
-#include <linux/random.h>
-#include <linux/time.h>
-#include <linux/device.h>
-
-#ifndef CONFIG_INPUT_TSDEV_SCREEN_X
-#define CONFIG_INPUT_TSDEV_SCREEN_X	240
-#endif
-#ifndef CONFIG_INPUT_TSDEV_SCREEN_Y
-#define CONFIG_INPUT_TSDEV_SCREEN_Y	320
-#endif
-
-/* This driver emulates both protocols of the old h3600_ts and h3600_tsraw
- * devices. The first one must output X/Y data in 'cooked' format, e.g.
- * filtered, dejittered and calibrated. Second device just outputs raw
- * data received from the hardware.
- *
- * This driver doesn't support filtering and dejittering; it supports only
- * calibration. Filtering and dejittering must be done in the low-level
- * driver, if needed, because it may gain additional benefits from knowing
- * the low-level details, the nature of noise and so on.
- *
- * The driver precomputes a calibration matrix given the initial xres and
- * yres values (quite innacurate for most touchscreens) that will result
- * in a more or less expected range of output values. The driver supports
- * the TS_SET_CAL ioctl, which will replace the calibration matrix with a
- * new one, supposedly generated from the values taken from the raw device.
- */
-
-MODULE_AUTHOR("James Simmons <jsimmons@transvirtual.com>");
-MODULE_DESCRIPTION("Input driver to touchscreen converter");
-MODULE_LICENSE("GPL");
-
-static int xres = CONFIG_INPUT_TSDEV_SCREEN_X;
-module_param(xres, uint, 0);
-MODULE_PARM_DESC(xres, "Horizontal screen resolution (can be negative for X-mirror)");
-
-static int yres = CONFIG_INPUT_TSDEV_SCREEN_Y;
-module_param(yres, uint, 0);
-MODULE_PARM_DESC(yres, "Vertical screen resolution (can be negative for Y-mirror)");
-
-/* From Compaq's Touch Screen Specification version 0.2 (draft) */
-struct ts_event {
-	short pressure;
-	short x;
-	short y;
-	short millisecs;
-};
-
-struct ts_calibration {
-	int xscale;
-	int xtrans;
-	int yscale;
-	int ytrans;
-	int xyswap;
-};
-
-struct tsdev {
-	int exist;
-	int open;
-	int minor;
-	char name[8];
-	struct input_handle handle;
-	wait_queue_head_t wait;
-	struct list_head client_list;
-	struct device dev;
-
-	int x, y, pressure;
-	struct ts_calibration cal;
-};
-
-struct tsdev_client {
-	struct fasync_struct *fasync;
-	struct list_head node;
-	struct tsdev *tsdev;
-	int head, tail;
-	struct ts_event event[TSDEV_BUFFER_SIZE];
-	int raw;
-};
-
-/* The following ioctl codes are defined ONLY for backward compatibility.
- * Don't use tsdev for new developement; use the tslib library instead.
- * Touchscreen calibration is a fully userspace task.
- */
-/* Use 'f' as magic number */
-#define IOC_H3600_TS_MAGIC  'f'
-#define TS_GET_CAL	_IOR(IOC_H3600_TS_MAGIC, 10, struct ts_calibration)
-#define TS_SET_CAL	_IOW(IOC_H3600_TS_MAGIC, 11, struct ts_calibration)
-
-static struct tsdev *tsdev_table[TSDEV_MINORS/2];
-
-static int tsdev_fasync(int fd, struct file *file, int on)
-{
-	struct tsdev_client *client = file->private_data;
-	int retval;
-
-	retval = fasync_helper(fd, file, on, &client->fasync);
-	return retval < 0 ? retval : 0;
-}
-
-static int tsdev_open(struct inode *inode, struct file *file)
-{
-	int i = iminor(inode) - TSDEV_MINOR_BASE;
-	struct tsdev_client *client;
-	struct tsdev *tsdev;
-	int error;
-
-	printk(KERN_WARNING "tsdev (compaq touchscreen emulation) is scheduled "
-		"for removal.\nSee Documentation/feature-removal-schedule.txt "
-		"for details.\n");
-
-	if (i >= TSDEV_MINORS)
-		return -ENODEV;
-
-	tsdev = tsdev_table[i & TSDEV_MINOR_MASK];
-	if (!tsdev || !tsdev->exist)
-		return -ENODEV;
-
-	get_device(&tsdev->dev);
-
-	client = kzalloc(sizeof(struct tsdev_client), GFP_KERNEL);
-	if (!client) {
-		error = -ENOMEM;
-		goto err_put_tsdev;
-	}
-
-	client->tsdev = tsdev;
-	client->raw = (i >= TSDEV_MINORS / 2) ? 1 : 0;
-	list_add_tail(&client->node, &tsdev->client_list);
-
-	if (!tsdev->open++ && tsdev->exist) {
-		error = input_open_device(&tsdev->handle);
-		if (error)
-			goto err_free_client;
-	}
-
-	file->private_data = client;
-	return 0;
-
- err_free_client:
-	list_del(&client->node);
-	kfree(client);
- err_put_tsdev:
-	put_device(&tsdev->dev);
-	return error;
-}
-
-static void tsdev_free(struct device *dev)
-{
-	struct tsdev *tsdev = container_of(dev, struct tsdev, dev);
-
-	tsdev_table[tsdev->minor] = NULL;
-	kfree(tsdev);
-}
-
-static int tsdev_release(struct inode *inode, struct file *file)
-{
-	struct tsdev_client *client = file->private_data;
-	struct tsdev *tsdev = client->tsdev;
-
-	tsdev_fasync(-1, file, 0);
-
-	list_del(&client->node);
-	kfree(client);
-
-	if (!--tsdev->open && tsdev->exist)
-		input_close_device(&tsdev->handle);
-
-	put_device(&tsdev->dev);
-
-	return 0;
-}
-
-static ssize_t tsdev_read(struct file *file, char __user *buffer, size_t count,
-			  loff_t *ppos)
-{
-	struct tsdev_client *client = file->private_data;
-	struct tsdev *tsdev = client->tsdev;
-	int retval = 0;
-
-	if (client->head == client->tail && tsdev->exist && (file->f_flags & O_NONBLOCK))
-		return -EAGAIN;
-
-	retval = wait_event_interruptible(tsdev->wait,
-			client->head != client->tail || !tsdev->exist);
-	if (retval)
-		return retval;
-
-	if (!tsdev->exist)
-		return -ENODEV;
-
-	while (client->head != client->tail &&
-	       retval + sizeof (struct ts_event) <= count) {
-		if (copy_to_user (buffer + retval, client->event + client->tail,
-				  sizeof (struct ts_event)))
-			return -EFAULT;
-		client->tail = (client->tail + 1) & (TSDEV_BUFFER_SIZE - 1);
-		retval += sizeof (struct ts_event);
-	}
-
-	return retval;
-}
-
-/* No kernel lock - fine */
-static unsigned int tsdev_poll(struct file *file, poll_table *wait)
-{
-	struct tsdev_client *client = file->private_data;
-	struct tsdev *tsdev = client->tsdev;
-
-	poll_wait(file, &tsdev->wait, wait);
-	return ((client->head == client->tail) ? 0 : (POLLIN | POLLRDNORM)) |
-		(tsdev->exist ? 0 : (POLLHUP | POLLERR));
-}
-
-static int tsdev_ioctl(struct inode *inode, struct file *file,
-		       unsigned int cmd, unsigned long arg)
-{
-	struct tsdev_client *client = file->private_data;
-	struct tsdev *tsdev = client->tsdev;
-	int retval = 0;
-
-	switch (cmd) {
-	case TS_GET_CAL:
-		if (copy_to_user((void __user *)arg, &tsdev->cal,
-				 sizeof (struct ts_calibration)))
-			retval = -EFAULT;
-		break;
-
-	case TS_SET_CAL:
-		if (copy_from_user(&tsdev->cal, (void __user *)arg,
-				   sizeof (struct ts_calibration)))
-			retval = -EFAULT;
-		break;
-
-	default:
-		retval = -EINVAL;
-		break;
-	}
-
-	return retval;
-}
-
-static const struct file_operations tsdev_fops = {
-	.owner =	THIS_MODULE,
-	.open =		tsdev_open,
-	.release =	tsdev_release,
-	.read =		tsdev_read,
-	.poll =		tsdev_poll,
-	.fasync =	tsdev_fasync,
-	.ioctl =	tsdev_ioctl,
-};
-
-static void tsdev_event(struct input_handle *handle, unsigned int type,
-			unsigned int code, int value)
-{
-	struct tsdev *tsdev = handle->private;
-	struct tsdev_client *client;
-	struct timeval time;
-
-	switch (type) {
-	case EV_ABS:
-		switch (code) {
-		case ABS_X:
-			tsdev->x = value;
-			break;
-
-		case ABS_Y:
-			tsdev->y = value;
-			break;
-
-		case ABS_PRESSURE:
-			if (value > handle->dev->absmax[ABS_PRESSURE])
-				value = handle->dev->absmax[ABS_PRESSURE];
-			value -= handle->dev->absmin[ABS_PRESSURE];
-			if (value < 0)
-				value = 0;
-			tsdev->pressure = value;
-			break;
-		}
-		break;
-
-	case EV_REL:
-		switch (code) {
-		case REL_X:
-			tsdev->x += value;
-			if (tsdev->x < 0)
-				tsdev->x = 0;
-			else if (tsdev->x > xres)
-				tsdev->x = xres;
-			break;
-
-		case REL_Y:
-			tsdev->y += value;
-			if (tsdev->y < 0)
-				tsdev->y = 0;
-			else if (tsdev->y > yres)
-				tsdev->y = yres;
-			break;
-		}
-		break;
-
-	case EV_KEY:
-		if (code == BTN_TOUCH || code == BTN_MOUSE) {
-			switch (value) {
-			case 0:
-				tsdev->pressure = 0;
-				break;
-
-			case 1:
-				if (!tsdev->pressure)
-					tsdev->pressure = 1;
-				break;
-			}
-		}
-		break;
-	}
-
-	if (type != EV_SYN || code != SYN_REPORT)
-		return;
-
-	list_for_each_entry(client, &tsdev->client_list, node) {
-		int x, y, tmp;
-
-		do_gettimeofday(&time);
-		client->event[client->head].millisecs = time.tv_usec / 1000;
-		client->event[client->head].pressure = tsdev->pressure;
-
-		x = tsdev->x;
-		y = tsdev->y;
-
-		/* Calibration */
-		if (!client->raw) {
-			x = ((x * tsdev->cal.xscale) >> 8) + tsdev->cal.xtrans;
-			y = ((y * tsdev->cal.yscale) >> 8) + tsdev->cal.ytrans;
-			if (tsdev->cal.xyswap) {
-				tmp = x; x = y; y = tmp;
-			}
-		}
-
-		client->event[client->head].x = x;
-		client->event[client->head].y = y;
-		client->head = (client->head + 1) & (TSDEV_BUFFER_SIZE - 1);
-		kill_fasync(&client->fasync, SIGIO, POLL_IN);
-	}
-	wake_up_interruptible(&tsdev->wait);
-}
-
-static int tsdev_connect(struct input_handler *handler, struct input_dev *dev,
-			 const struct input_device_id *id)
-{
-	struct tsdev *tsdev;
-	int minor, delta;
-	int error;
-
-	for (minor = 0; minor < TSDEV_MINORS / 2 && tsdev_table[minor]; minor++);
-	if (minor >= TSDEV_MINORS / 2) {
-		printk(KERN_ERR
-		       "tsdev: You have way too many touchscreens\n");
-		return -ENFILE;
-	}
-
-	tsdev = kzalloc(sizeof(struct tsdev), GFP_KERNEL);
-	if (!tsdev)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&tsdev->client_list);
-	init_waitqueue_head(&tsdev->wait);
-
-	tsdev->exist = 1;
-	tsdev->minor = minor;
-	tsdev->handle.dev = dev;
-	tsdev->handle.name = tsdev->name;
-	tsdev->handle.handler = handler;
-	tsdev->handle.private = tsdev;
-	snprintf(tsdev->name, sizeof(tsdev->name), "ts%d", minor);
-
-	/* Precompute the rough calibration matrix */
-	delta = dev->absmax [ABS_X] - dev->absmin [ABS_X] + 1;
-	if (delta == 0)
-		delta = 1;
-	tsdev->cal.xscale = (xres << 8) / delta;
-	tsdev->cal.xtrans = - ((dev->absmin [ABS_X] * tsdev->cal.xscale) >> 8);
-
-	delta = dev->absmax [ABS_Y] - dev->absmin [ABS_Y] + 1;
-	if (delta == 0)
-		delta = 1;
-	tsdev->cal.yscale = (yres << 8) / delta;
-	tsdev->cal.ytrans = - ((dev->absmin [ABS_Y] * tsdev->cal.yscale) >> 8);
-
-	snprintf(tsdev->dev.bus_id, sizeof(tsdev->dev.bus_id),
-		 "ts%d", minor);
-	tsdev->dev.class = &input_class;
-	tsdev->dev.parent = &dev->dev;
-	tsdev->dev.devt = MKDEV(INPUT_MAJOR, TSDEV_MINOR_BASE + minor);
-	tsdev->dev.release = tsdev_free;
-	device_initialize(&tsdev->dev);
-
-	tsdev_table[minor] = tsdev;
-
-	error = device_add(&tsdev->dev);
-	if (error)
-		goto err_free_tsdev;
-
-	error = input_register_handle(&tsdev->handle);
-	if (error)
-		goto err_delete_tsdev;
-
-	return 0;
-
- err_delete_tsdev:
-	device_del(&tsdev->dev);
- err_free_tsdev:
-	put_device(&tsdev->dev);
-	return error;
-}
-
-static void tsdev_disconnect(struct input_handle *handle)
-{
-	struct tsdev *tsdev = handle->private;
-	struct tsdev_client *client;
-
-	input_unregister_handle(handle);
-	device_del(&tsdev->dev);
-
-	tsdev->exist = 0;
-
-	if (tsdev->open) {
-		input_close_device(handle);
-		list_for_each_entry(client, &tsdev->client_list, node)
-			kill_fasync(&client->fasync, SIGIO, POLL_HUP);
-		wake_up_interruptible(&tsdev->wait);
-	}
-
-	put_device(&tsdev->dev);
-}
-
-static const struct input_device_id tsdev_ids[] = {
-	{
-	      .flags	= INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT,
-	      .evbit	= { BIT(EV_KEY) | BIT(EV_REL) },
-	      .keybit	= { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
-	      .relbit	= { BIT(REL_X) | BIT(REL_Y) },
-	}, /* A mouse like device, at least one button, two relative axes */
-
-	{
-	      .flags	= INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
-	      .evbit	= { BIT(EV_KEY) | BIT(EV_ABS) },
-	      .keybit	= { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
-	      .absbit	= { BIT(ABS_X) | BIT(ABS_Y) },
-	}, /* A tablet like device, at least touch detection, two absolute axes */
-
-	{
-	      .flags	= INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
-	      .evbit	= { BIT(EV_ABS) },
-	      .absbit	= { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) },
-	}, /* A tablet like device with several gradations of pressure */
-
-	{} /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE(input, tsdev_ids);
-
-static struct input_handler tsdev_handler = {
-	.event =	tsdev_event,
-	.connect =	tsdev_connect,
-	.disconnect =	tsdev_disconnect,
-	.fops =		&tsdev_fops,
-	.minor =	TSDEV_MINOR_BASE,
-	.name =		"tsdev",
-	.id_table =	tsdev_ids,
-};
-
-static int __init tsdev_init(void)
-{
-	return input_register_handler(&tsdev_handler);
-}
-
-static void __exit tsdev_exit(void)
-{
-	input_unregister_handler(&tsdev_handler);
-}
-
-module_init(tsdev_init);
-module_exit(tsdev_exit);
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 7c9cb7e..b39d1f5 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -328,7 +328,7 @@
 			l->cps = (l->transcount * HZ) / (jiffies - last_jiffies);
 		l->transcount = 0;
 		if (dev->net_verbose > 3)
-			printk(KERN_DEBUG "%s: %d bogocps\n", l->name, l->cps);
+			printk(KERN_DEBUG "%s: %d bogocps\n", p->dev->name, l->cps);
 		if ((l->flags & ISDN_NET_CONNECTED) && (!l->dialstate)) {
 			anymore = 1;
 			l->huptimer++;
@@ -350,12 +350,12 @@
 					if (l->hupflags & ISDN_CHARGEHUP) {
 						if (l->hupflags & ISDN_WAITCHARGE) {
 							printk(KERN_DEBUG "isdn_net: Hupflags of %s are %X\n",
-							       l->name, l->hupflags);
+							       p->dev->name, l->hupflags);
 							isdn_net_hangup(p->dev);
 						} else if (time_after(jiffies, l->chargetime + l->chargeint)) {
 							printk(KERN_DEBUG
 							       "isdn_net: %s: chtime = %lu, chint = %d\n",
-							       l->name, l->chargetime, l->chargeint);
+							       p->dev->name, l->chargetime, l->chargeint);
 							isdn_net_hangup(p->dev);
 						}
 					} else
@@ -442,8 +442,8 @@
 #endif
 					isdn_net_lp_disconnected(lp);
 					isdn_all_eaz(lp->isdn_device, lp->isdn_channel);
-					printk(KERN_INFO "%s: remote hangup\n", lp->name);
-					printk(KERN_INFO "%s: Chargesum is %d\n", lp->name,
+					printk(KERN_INFO "%s: remote hangup\n", p->dev->name);
+					printk(KERN_INFO "%s: Chargesum is %d\n", p->dev->name,
 					       lp->charge);
 					isdn_net_unbind_channel(lp);
 					return 1;
@@ -487,7 +487,7 @@
 								isdn_net_add_to_bundle(nd, lp);
 							}
 						}
-						printk(KERN_INFO "isdn_net: %s connected\n", lp->name);
+						printk(KERN_INFO "isdn_net: %s connected\n", p->dev->name);
 						/* If first Chargeinfo comes before B-Channel connect,
 						 * we correct the timestamp here.
 						 */
@@ -534,7 +534,7 @@
 					lp->hupflags |= ISDN_HAVECHARGE;
 				lp->chargetime = jiffies;
 				printk(KERN_DEBUG "isdn_net: Got CINF chargetime of %s now %lu\n",
-				       lp->name, lp->chargetime);
+				       p->dev->name, lp->chargetime);
 				return 1;
 		}
 	}
@@ -565,7 +565,7 @@
 
 #ifdef ISDN_DEBUG_NET_DIAL
 		if (lp->dialstate)
-			printk(KERN_DEBUG "%s: dialstate=%d\n", lp->name, lp->dialstate);
+			printk(KERN_DEBUG "%s: dialstate=%d\n", p->dev->name, lp->dialstate);
 #endif
 		switch (lp->dialstate) {
 			case 0:
@@ -578,7 +578,7 @@
 				lp->dial = lp->phone[1];
 				if (!lp->dial) {
 					printk(KERN_WARNING "%s: phone number deleted?\n",
-					       lp->name);
+					       p->dev->name);
 					isdn_net_hangup(p->dev);
 					break;
 				}
@@ -632,13 +632,13 @@
 				cmd.arg = lp->isdn_channel;
 				if (!lp->dial) {
 					printk(KERN_WARNING "%s: phone number deleted?\n",
-					       lp->name);
+					       p->dev->name);
 					isdn_net_hangup(p->dev);
 					break;
 				}
 				if (!strncmp(lp->dial->num, "LEASED", strlen("LEASED"))) {
 					lp->dialstate = 4;
-					printk(KERN_INFO "%s: Open leased line ...\n", lp->name);
+					printk(KERN_INFO "%s: Open leased line ...\n", p->dev->name);
 				} else {
 					if(lp->dialtimeout > 0)
 						if (time_after(jiffies, lp->dialstarted + lp->dialtimeout)) {
@@ -688,7 +688,7 @@
 						dev->usage[i] |= ISDN_USAGE_OUTGOING;
 						isdn_info_update();
 					}
-					printk(KERN_INFO "%s: dialing %d %s... %s\n", lp->name,
+					printk(KERN_INFO "%s: dialing %d %s... %s\n", p->dev->name,
 					       lp->dialretry, cmd.parm.setup.phone,
 					       (cmd.parm.setup.si1 == 1) ? "DOV" : "");
 					lp->dtimer = 0;
@@ -797,7 +797,7 @@
 				 */
 				if (lp->dtimer++ > lp->cbdelay)
 				{
-					printk(KERN_INFO "%s: hangup waiting for callback ...\n", lp->name);
+					printk(KERN_INFO "%s: hangup waiting for callback ...\n", p->dev->name);
 					lp->dtimer = 0;
 					lp->dialstate = 4;
 					cmd.driver = lp->isdn_device;
@@ -810,7 +810,7 @@
 				break;
 			default:
 				printk(KERN_WARNING "isdn_net: Illegal dialstate %d for device %s\n",
-				       lp->dialstate, lp->name);
+				       lp->dialstate, p->dev->name);
 		}
 		p = (isdn_net_dev *) p->next;
 	}
@@ -836,11 +836,11 @@
 			if (slp->flags & ISDN_NET_CONNECTED) {
 				printk(KERN_INFO
 					"isdn_net: hang up slave %s before %s\n",
-					slp->name, lp->name);
+					lp->slave->name, d->name);
 				isdn_net_hangup(lp->slave);
 			}
 		}
-		printk(KERN_INFO "isdn_net: local hangup %s\n", lp->name);
+		printk(KERN_INFO "isdn_net: local hangup %s\n", d->name);
 #ifdef CONFIG_ISDN_PPP
 		if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP)
 			isdn_ppp_free(lp);
@@ -858,7 +858,7 @@
 		cmd.command = ISDN_CMD_HANGUP;
 		cmd.arg = lp->isdn_channel;
 		isdn_command(&cmd);
-		printk(KERN_INFO "%s: Chargesum is %d\n", lp->name, lp->charge);
+		printk(KERN_INFO "%s: Chargesum is %d\n", d->name, lp->charge);
 		isdn_all_eaz(lp->isdn_device, lp->isdn_channel);
 	}
 	isdn_net_unbind_channel(lp);
@@ -885,7 +885,7 @@
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
-		printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->name);
+		printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->netdev->dev->name);
 		p = buf;
 		proto = ETH_P_IP;
 		switch (lp->p_encap) {
@@ -1023,7 +1023,7 @@
 	ret = isdn_writebuf_skb_stub(lp->isdn_device, lp->isdn_channel, 1, skb);
 	if (ret != len) {
 		/* we should never get here */
-		printk(KERN_WARNING "%s: HL driver queue full\n", lp->name);
+		printk(KERN_WARNING "%s: HL driver queue full\n", lp->netdev->dev->name);
 		goto error;
 	}
 	
@@ -1461,7 +1461,7 @@
 				mod_timer(&lp->cisco_timer, expires);
 				printk(KERN_INFO "%s: Keepalive period set "
 					"to %d seconds.\n",
-					lp->name, lp->cisco_keepalive_period);
+					dev->name, lp->cisco_keepalive_period);
 			}
 			break;
 
@@ -1512,7 +1512,7 @@
 		lp->cisco_line_state = 0;
 		printk (KERN_WARNING
 				"UPDOWN: Line protocol on Interface %s,"
-				" changed state to down\n", lp->name);
+				" changed state to down\n", lp->netdev->dev->name);
 		/* should stop routing higher-level data accross */
 	} else if ((!lp->cisco_line_state) &&
 		(myseq_diff >= 0) && (myseq_diff <= 2)) {
@@ -1520,14 +1520,14 @@
 		lp->cisco_line_state = 1;
 		printk (KERN_WARNING
 				"UPDOWN: Line protocol on Interface %s,"
-				" changed state to up\n", lp->name);
+				" changed state to up\n", lp->netdev->dev->name);
 		/* restart routing higher-level data accross */
 	}
 
 	if (lp->cisco_debserint)
 		printk (KERN_DEBUG "%s: HDLC "
 			"myseq %lu, mineseen %lu%c, yourseen %lu, %s\n",
-			lp->name, last_cisco_myseq, lp->cisco_mineseen,
+			lp->netdev->dev->name, last_cisco_myseq, lp->cisco_mineseen,
 			((last_cisco_myseq == lp->cisco_mineseen) ? '*' : 040),
 			lp->cisco_yourseq,
 			((lp->cisco_line_state) ? "line up" : "line down"));
@@ -1682,7 +1682,7 @@
 			"remote ip: %d.%d.%d.%d, "
 			"local ip: %d.%d.%d.%d "
 			"mask: %d.%d.%d.%d\n",
-		       lp->name,
+		       lp->netdev->dev->name,
 		       HIPQUAD(addr),
 		       HIPQUAD(local),
 		       HIPQUAD(mask));
@@ -1690,7 +1690,7 @@
   slarp_reply_out:
 		 printk(KERN_INFO "%s: got invalid slarp "
 				 "reply (%d.%d.%d.%d/%d.%d.%d.%d) "
-				 "- ignored\n", lp->name,
+				 "- ignored\n", lp->netdev->dev->name,
 				 HIPQUAD(addr), HIPQUAD(mask));
 		break;
 	case CISCO_SLARP_KEEPALIVE:
@@ -1701,7 +1701,8 @@
 				lp->cisco_last_slarp_in) {
 			printk(KERN_DEBUG "%s: Keepalive period mismatch - "
 				"is %d but should be %d.\n",
-				lp->name, period, lp->cisco_keepalive_period);
+				lp->netdev->dev->name, period,
+				lp->cisco_keepalive_period);
 		}
 		lp->cisco_last_slarp_in = jiffies;
 		p += get_u32(p, &my_seq);
@@ -1732,12 +1733,12 @@
 	
 	if (addr != CISCO_ADDR_UNICAST && addr != CISCO_ADDR_BROADCAST) {
 		printk(KERN_WARNING "%s: Unknown Cisco addr 0x%02x\n",
-		       lp->name, addr);
+		       lp->netdev->dev->name, addr);
 		goto out_free;
 	}
 	if (ctrl != CISCO_CTRL) {
 		printk(KERN_WARNING "%s: Unknown Cisco ctrl 0x%02x\n",
-		       lp->name, ctrl);
+		       lp->netdev->dev->name, ctrl);
 		goto out_free;
 	}
 
@@ -1748,7 +1749,8 @@
 	case CISCO_TYPE_CDP:
 		if (lp->cisco_debserint)
 			printk(KERN_DEBUG "%s: Received CDP packet. use "
-				"\"no cdp enable\" on cisco.\n", lp->name);
+				"\"no cdp enable\" on cisco.\n",
+				lp->netdev->dev->name);
 		goto out_free;
 	default:
 		/* no special cisco protocol */
@@ -1843,7 +1845,7 @@
 				};
 #endif /* CONFIG_ISDN_X25 */
 			printk(KERN_WARNING "%s: unknown encapsulation, dropping\n",
-			       lp->name);
+			       lp->netdev->dev->name);
 			kfree_skb(skb);
 			return;
 	}
@@ -2174,7 +2176,7 @@
 			wret = matchret;
 #ifdef ISDN_DEBUG_NET_ICALL
 		printk(KERN_DEBUG "n_fi: if='%s', l.msn=%s, l.flags=%d, l.dstate=%d\n",
-		       lp->name, lp->msn, lp->flags, lp->dialstate);
+		       p->dev->name, lp->msn, lp->flags, lp->dialstate);
 #endif
 		if ((!matchret) &&                                        /* EAZ is matching   */
 		    (((!(lp->flags & ISDN_NET_CONNECTED)) &&              /* but not connected */
@@ -2277,7 +2279,7 @@
 				 * */
 				if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) {
 					printk(KERN_INFO "incoming call, interface %s `stopped' -> rejected\n",
-					       lp->name);
+					       p->dev->name);
 					return 3;
 				}
 				/*
@@ -2286,7 +2288,7 @@
 				 */
 				if (!isdn_net_device_started(p)) {
 					printk(KERN_INFO "%s: incoming call, interface down -> rejected\n",
-					       lp->name);
+					       p->dev->name);
 					return 3;
 				}
 				/* Interface is up, now see if it's a slave. If so, see if
@@ -2294,8 +2296,8 @@
 				 */
 				if (lp->master) {
 					isdn_net_local *mlp = (isdn_net_local *) lp->master->priv;
-					printk(KERN_DEBUG "ICALLslv: %s\n", lp->name);
-					printk(KERN_DEBUG "master=%s\n", mlp->name);
+					printk(KERN_DEBUG "ICALLslv: %s\n", p->dev->name);
+					printk(KERN_DEBUG "master=%s\n", lp->master->name);
 					if (mlp->flags & ISDN_NET_CONNECTED) {
 						printk(KERN_DEBUG "master online\n");
 						/* Master is online, find parent-slave (master if first slave) */
@@ -2322,11 +2324,11 @@
 					 * */
 					if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) {
 						printk(KERN_INFO "incoming call for callback, interface %s `off' -> rejected\n",
-						       lp->name);
+						       p->dev->name);
 						return 3;
 					}
 					printk(KERN_DEBUG "%s: call from %s -> %s, start callback\n",
-					       lp->name, nr, eaz);
+					       p->dev->name, nr, eaz);
 					if (lp->phone[1]) {
 						/* Grab a free ISDN-Channel */
 						spin_lock_irqsave(&dev->lock, flags);
@@ -2340,7 +2342,8 @@
 						 		lp->msn)
 								) < 0) {
 
-							printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", lp->name);
+							printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n",
+								p->dev->name);
 							spin_unlock_irqrestore(&dev->lock, flags);
 							return 0;
 						}
@@ -2361,11 +2364,12 @@
 						/* Initiate dialing by returning 2 or 4 */
 						return (lp->flags & ISDN_NET_CBHUP) ? 2 : 4;
 					} else
-						printk(KERN_WARNING "isdn_net: %s: No phone number\n", lp->name);
+						printk(KERN_WARNING "isdn_net: %s: No phone number\n",
+							p->dev->name);
 					return 0;
 				} else {
-					printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", lp->name, nr,
-					       eaz);
+					printk(KERN_DEBUG "%s: call from %s -> %s accepted\n",
+						p->dev->name, nr, eaz);
 					/* if this interface is dialing, it does it probably on a different
 					   device, so free this device */
 					if ((lp->dialstate == 4) || (lp->dialstate == 12)) {
@@ -2424,7 +2428,7 @@
 	isdn_net_dev *p = dev->netdev;
 
 	while (p) {
-		if (!strcmp(p->local->name, name))
+		if (!strcmp(p->dev->name, name))
 			return p;
 		p = (isdn_net_dev *) p->next;
 	}
@@ -2453,7 +2457,8 @@
 					lp->pre_device,
 					lp->pre_channel,
 					lp->msn)) < 0) {
-				printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", lp->name);
+				printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n",
+					lp->netdev->dev->name);
 				spin_unlock_irqrestore(&dev->lock, flags);
 				return -EAGAIN;
 			}
@@ -2556,7 +2561,7 @@
 		return NULL;
 	}
 	if (name == NULL)
-		name = "         ";
+		return NULL;
 	if (!(netdev = kzalloc(sizeof(isdn_net_dev), GFP_KERNEL))) {
 		printk(KERN_WARNING "isdn_net: Could not allocate net-device\n");
 		return NULL;
@@ -2568,7 +2573,6 @@
 		return NULL;
 	}
 	netdev->local = netdev->dev->priv;
-	strcpy(netdev->local->name, netdev->dev->name);
 	netdev->dev->init = isdn_net_init;
 	if (master) {
 		/* Device shall be a slave */
@@ -2673,7 +2677,7 @@
 #endif
 			if (isdn_net_device_started(p)) {
 				printk(KERN_WARNING "%s: cannot change encap when if is up\n",
-				       lp->name);
+				       p->dev->name);
 				return -EBUSY;
 			}
 #ifdef CONFIG_ISDN_X25
@@ -2698,7 +2702,7 @@
 		case ISDN_NET_ENCAP_SYNCPPP:
 #ifndef CONFIG_ISDN_PPP
 			printk(KERN_WARNING "%s: SyncPPP support not configured\n",
-			       lp->name);
+			       p->dev->name);
 			return -EINVAL;
 #else
 			p->dev->type = ARPHRD_PPP;	/* change ARP type */
@@ -2709,7 +2713,7 @@
 		case ISDN_NET_ENCAP_X25IFACE:
 #ifndef CONFIG_ISDN_X25
 			printk(KERN_WARNING "%s: isdn-x25 support not configured\n",
-			       p->local->name);
+			       p->dev->name);
 			return -EINVAL;
 #else
 			p->dev->type = ARPHRD_X25;	/* change ARP type */
@@ -2725,7 +2729,7 @@
 				break;
 			printk(KERN_WARNING
 			       "%s: encapsulation protocol %d not supported\n",
-			       p->local->name, cfg->p_encap);
+			       p->dev->name, cfg->p_encap);
 			return -EINVAL;
 		}
 		if (strlen(cfg->drvid)) {
@@ -2902,13 +2906,18 @@
 		cfg->pppbind = lp->pppbind;
 		cfg->dialtimeout = lp->dialtimeout >= 0 ? lp->dialtimeout / HZ : -1;
 		cfg->dialwait = lp->dialwait / HZ;
-		if (lp->slave)
-			strcpy(cfg->slave, ((isdn_net_local *) lp->slave->priv)->name);
-		else
+		if (lp->slave) {
+			if (strlen(lp->slave->name) > 8)
+				strcpy(cfg->slave, "too-long");
+			else
+				strcpy(cfg->slave, lp->slave->name);
+		} else
 			cfg->slave[0] = '\0';
-		if (lp->master)
-			strcpy(cfg->master, ((isdn_net_local *) lp->master->priv)->name);
-		else
+		if (lp->master) {
+			if (strlen(lp->master->name) > 8)
+				strcpy(cfg->master, "too-long");
+			strcpy(cfg->master, lp->master->name);
+		} else
 			cfg->master[0] = '\0';
 		return 0;
 	}
@@ -2978,7 +2987,8 @@
 	isdn_net_dev *p = isdn_net_findif(phone->name);
 	int ch, dv, idx;
 
-	if (!p) return -ENODEV;
+	if (!p)
+		return -ENODEV;
 	/*
 	 * Theoretical race: while this executes, the remote number might
 	 * become invalid (hang up) or change (new connection), resulting
@@ -2987,14 +2997,18 @@
 	 */
 	ch = p->local->isdn_channel;
 	dv = p->local->isdn_device;
-	if(ch<0 && dv<0) return -ENOTCONN;
+	if(ch < 0 && dv < 0)
+		return -ENOTCONN;
 	idx = isdn_dc2minor(dv, ch);
-	if (idx<0) return -ENODEV;
+	if (idx <0 )
+		return -ENODEV;
 	/* for pre-bound channels, we need this extra check */
-	if ( strncmp(dev->num[idx],"???",3) == 0 ) return -ENOTCONN;
-	strncpy(phone->phone,dev->num[idx],ISDN_MSNLEN);
-	phone->outgoing=USG_OUTGOING(dev->usage[idx]);
-	if ( copy_to_user(peer,phone,sizeof(*peer)) ) return -EFAULT;
+	if (strncmp(dev->num[idx], "???", 3) == 0)
+		return -ENOTCONN;
+	strncpy(phone->phone, dev->num[idx], ISDN_MSNLEN);
+	phone->outgoing = USG_OUTGOING(dev->usage[idx]);
+	if (copy_to_user(peer, phone, sizeof(*peer)))
+		return -EFAULT;
 	return 0;
 }
 /*
@@ -3113,18 +3127,18 @@
 		dev->netdev = p->next;
 	if (p->local->slave) {
 		/* If this interface has a slave, remove it also */
-		char *slavename = ((isdn_net_local *) (p->local->slave->priv))->name;
+		char *slavename = p->local->slave->name;
 		isdn_net_dev *n = dev->netdev;
 		q = NULL;
 		while (n) {
-			if (!strcmp(n->local->name, slavename)) {
+			if (!strcmp(n->dev->name, slavename)) {
 				spin_unlock_irqrestore(&dev->lock, flags);
 				isdn_net_realrm(n, q);
 				spin_lock_irqsave(&dev->lock, flags);
 				break;
 			}
 			q = n;
-			n = (isdn_net_dev *) n->next;
+			n = (isdn_net_dev *)n->next;
 		}
 	}
 	spin_unlock_irqrestore(&dev->lock, flags);
@@ -3152,7 +3166,7 @@
 	p = dev->netdev;
 	q = NULL;
 	while (p) {
-		if (!strcmp(p->local->name, name)) {
+		if (!strcmp(p->dev->name, name)) {
 			spin_unlock_irqrestore(&dev->lock, flags);
 			return (isdn_net_realrm(p, q));
 		}
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 0e5e59f..9f5fe37 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -190,9 +190,11 @@
 		retval = -1;
 		goto out;
 	}
-	unit = isdn_ppp_if_get_unit(lp->name);	/* get unit number from interface name .. ugly! */
+	/* get unit number from interface name .. ugly! */
+	unit = isdn_ppp_if_get_unit(lp->netdev->dev->name);
 	if (unit < 0) {
-		printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", lp->name);
+		printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n",
+			lp->netdev->dev->name);
 		retval = -1;
 		goto out;
 	}
@@ -507,7 +509,8 @@
 		case PPPIOCGIFNAME:
 			if(!lp)
 				return -EINVAL;
-			if ((r = set_arg(argp, lp->name, strlen(lp->name))))
+			if ((r = set_arg(argp, lp->netdev->dev->name,
+				strlen(lp->netdev->dev->name))))
 				return r;
 			break;
 		case PPPIOCGMPFLAGS:	/* get configuration flags */
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 56cd899..77f50b6 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -172,6 +172,7 @@
 
 config MAC_EMUMOUSEBTN
 	bool "Support for mouse button 2+3 emulation"
+	select INPUT
 	help
 	  This provides generic support for emulating the 2nd and 3rd mouse
 	  button with keypresses.  If you say Y here, the emulation is still
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index 48d17bf..8cce016 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -52,6 +52,11 @@
 
 MODULE_AUTHOR("Franz Sirl <Franz.Sirl-kernel@lauterbach.com>");
 
+static int restore_capslock_events;
+module_param(restore_capslock_events, int, 0644);
+MODULE_PARM_DESC(restore_capslock_events,
+	"Produce keypress events for capslock on both keyup and keydown.");
+
 #define KEYB_KEYREG	0	/* register # for key up/down data */
 #define KEYB_LEDREG	2	/* register # for leds on ADB keyboard */
 #define MOUSE_DATAREG	0	/* reg# for movement/button codes from mouse */
@@ -217,6 +222,8 @@
 #define FLAG_FN_KEY_PRESSED	0x00000001
 #define FLAG_POWER_FROM_FN	0x00000002
 #define FLAG_EMU_FWDEL_DOWN	0x00000004
+#define FLAG_CAPSLOCK_TRANSLATE	0x00000008
+#define FLAG_CAPSLOCK_DOWN	0x00000010
 
 static struct adbhid *adbhid[16];
 
@@ -272,19 +279,50 @@
 }
 
 static void
-adbhid_input_keycode(int id, int keycode, int repeat)
+adbhid_input_keycode(int id, int scancode, int repeat)
 {
 	struct adbhid *ahid = adbhid[id];
-	int up_flag, key;
+	int keycode, up_flag;
 
-	up_flag = (keycode & 0x80);
-	keycode &= 0x7f;
+	keycode = scancode & 0x7f;
+	up_flag = scancode & 0x80;
+
+	if (restore_capslock_events) {
+		if (keycode == ADB_KEY_CAPSLOCK && !up_flag) {
+			/* Key pressed, turning on the CapsLock LED.
+			 * The next 0xff will be interpreted as a release. */
+			ahid->flags |= FLAG_CAPSLOCK_TRANSLATE
+					| FLAG_CAPSLOCK_DOWN;
+		} else if (scancode == 0xff) {
+			/* Scancode 0xff usually signifies that the capslock
+			 * key was either pressed or released. */
+			if (ahid->flags & FLAG_CAPSLOCK_TRANSLATE) {
+				keycode = ADB_KEY_CAPSLOCK;
+				if (ahid->flags & FLAG_CAPSLOCK_DOWN) {
+					/* Key released */
+					up_flag = 1;
+					ahid->flags &= ~FLAG_CAPSLOCK_DOWN;
+				} else {
+					/* Key pressed */
+					up_flag = 0;
+					ahid->flags &= ~FLAG_CAPSLOCK_TRANSLATE;
+				}
+			} else {
+				printk(KERN_INFO "Spurious caps lock event "
+						"(scancode 0xff).");
+			}
+		}
+	}
 
 	switch (keycode) {
-	case ADB_KEY_CAPSLOCK: /* Generate down/up events for CapsLock everytime. */
-		input_report_key(ahid->input, KEY_CAPSLOCK, 1);
-		input_report_key(ahid->input, KEY_CAPSLOCK, 0);
-		input_sync(ahid->input);
+	case ADB_KEY_CAPSLOCK:
+		if (!restore_capslock_events) {
+			/* Generate down/up events for CapsLock everytime. */
+			input_report_key(ahid->input, KEY_CAPSLOCK, 1);
+			input_sync(ahid->input);
+			input_report_key(ahid->input, KEY_CAPSLOCK, 0);
+			input_sync(ahid->input);
+		}
 		return;
 #ifdef CONFIG_PPC_PMAC
 	case ADB_KEY_POWER_OLD: /* Power key on PBook 3400 needs remapping */
@@ -296,7 +334,7 @@
 			keycode = ADB_KEY_POWER;
 		}
 		break;
-	case ADB_KEY_POWER: 
+	case ADB_KEY_POWER:
 		/* Fn + Command will produce a bogus "power" keycode */
 		if (ahid->flags & FLAG_FN_KEY_PRESSED) {
 			keycode = ADB_KEY_CMD;
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 216948d..81e068f 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -945,15 +945,15 @@
 		KEY_UNKNOWN,	/* 0x0C: FN+BACKSPACE */
 		KEY_UNKNOWN,	/* 0x0D: FN+INSERT */
 		KEY_UNKNOWN,	/* 0x0E: FN+DELETE */
-		KEY_RESERVED,	/* 0x0F: FN+HOME (brightness up) */
+		KEY_BRIGHTNESSUP,	/* 0x0F: FN+HOME (brightness up) */
 		/* Scan codes 0x10 to 0x1F: Extended ACPI HKEY hot keys */
-		KEY_RESERVED,	/* 0x10: FN+END (brightness down) */
+		KEY_BRIGHTNESSDOWN,	/* 0x10: FN+END (brightness down) */
 		KEY_RESERVED,	/* 0x11: FN+PGUP (thinklight toggle) */
 		KEY_UNKNOWN,	/* 0x12: FN+PGDOWN */
 		KEY_ZOOM,	/* 0x13: FN+SPACE (zoom) */
-		KEY_RESERVED,	/* 0x14: VOLUME UP */
-		KEY_RESERVED,	/* 0x15: VOLUME DOWN */
-		KEY_RESERVED,	/* 0x16: MUTE */
+		KEY_VOLUMEUP,	/* 0x14: VOLUME UP */
+		KEY_VOLUMEDOWN,	/* 0x15: VOLUME DOWN */
+		KEY_MUTE,	/* 0x16: MUTE */
 		KEY_VENDOR,	/* 0x17: Thinkpad/AccessIBM/Lenovo */
 		/* (assignments unknown, please report if found) */
 		KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
@@ -974,9 +974,9 @@
 		KEY_RESERVED,	/* 0x11: FN+PGUP (thinklight toggle) */
 		KEY_UNKNOWN,	/* 0x12: FN+PGDOWN */
 		KEY_ZOOM,	/* 0x13: FN+SPACE (zoom) */
-		KEY_RESERVED,	/* 0x14: VOLUME UP */
-		KEY_RESERVED,	/* 0x15: VOLUME DOWN */
-		KEY_RESERVED,	/* 0x16: MUTE */
+		KEY_VOLUMEUP,	/* 0x14: VOLUME UP */
+		KEY_VOLUMEDOWN,	/* 0x15: VOLUME DOWN */
+		KEY_MUTE,	/* 0x16: MUTE */
 		KEY_VENDOR,	/* 0x17: Thinkpad/AccessIBM/Lenovo */
 		/* (assignments unknown, please report if found) */
 		KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index cb933ac..8211329 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -14,20 +14,20 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/io.h>
 #include <asm/hardware.h>
+#include <asm/cacheflush.h>
 
 #include <asm/mach/flash.h>
 
 static void pxa2xx_map_inval_cache(struct map_info *map, unsigned long from,
 				      ssize_t len)
 {
-	consistent_sync((char *)map->cached + from, len, DMA_FROM_DEVICE);
+	flush_ioremap_region(map->phys, map->cached, from, len);
 }
 
 struct pxa2xx_flash_info {
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9c635a2..8f99a06 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1780,6 +1780,15 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called sc92031.  This is recommended.
 
+config CPMAC
+	tristate "TI AR7 CPMAC Ethernet support (EXPERIMENTAL)"
+	depends on NET_ETHERNET && EXPERIMENTAL && AR7
+	select PHYLIB
+	select FIXED_PHY
+	select FIXED_MII_100_FDX
+	help
+	  TI AR7 CPMAC Ethernet support
+
 config NET_POCKET
 	bool "Pocket and portable adapters"
 	depends on PARPORT
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index d2e0f35..22f78cb 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -159,6 +159,7 @@
 obj-$(CONFIG_8139TOO) += 8139too.o
 obj-$(CONFIG_ZNET) += znet.o
 obj-$(CONFIG_LAN_SAA9730) += saa9730.o
+obj-$(CONFIG_CPMAC) += cpmac.o
 obj-$(CONFIG_DEPCA) += depca.o
 obj-$(CONFIG_EWRK3) += ewrk3.o
 obj-$(CONFIG_ATP) += atp.o
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index b46c5d8..185f98e 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -54,13 +54,16 @@
 #include <linux/delay.h>
 #include <linux/crc32.h>
 #include <linux/phy.h>
+
+#include <asm/cpu.h>
 #include <asm/mipsregs.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 
-#include <asm/mach-au1x00/au1000.h>
-#include <asm/cpu.h>
+#include <au1000.h>
+#include <prom.h>
+
 #include "au1000_eth.h"
 
 #ifdef AU1000_ETH_DEBUG
@@ -96,11 +99,6 @@
 static void au1000_adjust_link(struct net_device *);
 static void enable_mac(struct net_device *, int);
 
-// externs
-extern int get_ethernet_addr(char *ethernet_addr);
-extern void str2eaddr(unsigned char *ea, unsigned char *str);
-extern char * prom_getcmdline(void);
-
 /*
  * Theory of operation
  *
@@ -619,7 +617,6 @@
 	struct au1000_private *aup = NULL;
 	struct net_device *dev = NULL;
 	db_dest_t *pDB, *pDBfree;
-	char *pmac, *argptr;
 	char ethaddr[6];
 	int irq, i, err;
 	u32 base, macen;
@@ -677,21 +674,12 @@
 	au_macs[port_num] = aup;
 
 	if (port_num == 0) {
-		/* Check the environment variables first */
-		if (get_ethernet_addr(ethaddr) == 0)
+		if (prom_get_ethernet_addr(ethaddr) == 0)
 			memcpy(au1000_mac_addr, ethaddr, sizeof(au1000_mac_addr));
 		else {
-			/* Check command line */
-			argptr = prom_getcmdline();
-			if ((pmac = strstr(argptr, "ethaddr=")) == NULL)
-				printk(KERN_INFO "%s: No MAC address found\n",
-						 dev->name);
+			printk(KERN_INFO "%s: No MAC address found\n",
+					 dev->name);
 				/* Use the hard coded MAC addresses */
-			else {
-				str2eaddr(ethaddr, pmac + strlen("ethaddr="));
-				memcpy(au1000_mac_addr, ethaddr,
-				       sizeof(au1000_mac_addr));
-			}
 		}
 
 		setup_hw_rings(aup, MAC0_RX_DMA_ADDR, MAC0_TX_DMA_ADDR);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 64bfec3..db80f24 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -98,6 +98,7 @@
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
 static char *arp_validate = NULL;
+static int fail_over_mac = 0;
 struct bond_params bonding_defaults;
 
 module_param(max_bonds, int, 0);
@@ -131,6 +132,8 @@
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
 module_param(arp_validate, charp, 0);
 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
+module_param(fail_over_mac, int, 0);
+MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  0 of off (default), 1 for on.");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -1096,7 +1099,21 @@
 		if (new_active) {
 			bond_set_slave_active_flags(new_active);
 		}
-		bond_send_gratuitous_arp(bond);
+
+		/* when bonding does not set the slave MAC address, the bond MAC
+		 * address is the one of the active slave.
+		 */
+		if (new_active && bond->params.fail_over_mac)
+			memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr,
+				new_active->dev->addr_len);
+		if (bond->curr_active_slave &&
+			test_bit(__LINK_STATE_LINKWATCH_PENDING,
+					&bond->curr_active_slave->dev->state)) {
+			dprintk("delaying gratuitous arp on %s\n",
+				bond->curr_active_slave->dev->name);
+			bond->send_grat_arp = 1;
+		} else
+			bond_send_gratuitous_arp(bond);
 	}
 }
 
@@ -1217,7 +1234,8 @@
 	struct slave *slave;
 	struct net_device *bond_dev = bond->dev;
 	unsigned long features = bond_dev->features;
-	unsigned short max_hard_header_len = ETH_HLEN;
+	unsigned short max_hard_header_len = max((u16)ETH_HLEN,
+						bond_dev->hard_header_len);
 	int i;
 
 	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
@@ -1238,6 +1256,23 @@
 	return 0;
 }
 
+
+static void bond_setup_by_slave(struct net_device *bond_dev,
+				struct net_device *slave_dev)
+{
+	struct bonding *bond = bond_dev->priv;
+
+	bond_dev->neigh_setup           = slave_dev->neigh_setup;
+
+	bond_dev->type		    = slave_dev->type;
+	bond_dev->hard_header_len   = slave_dev->hard_header_len;
+	bond_dev->addr_len	    = slave_dev->addr_len;
+
+	memcpy(bond_dev->broadcast, slave_dev->broadcast,
+		slave_dev->addr_len);
+	bond->setup_by_slave = 1;
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1258,8 +1293,9 @@
 
 	/* bond must be initialized by bond_open() before enslaving */
 	if (!(bond_dev->flags & IFF_UP)) {
-		dprintk("Error, master_dev is not up\n");
-		return -EPERM;
+		printk(KERN_WARNING DRV_NAME
+			" %s: master_dev is not up in bond_enslave\n",
+			bond_dev->name);
 	}
 
 	/* already enslaved */
@@ -1312,14 +1348,42 @@
 		goto err_undo_flags;
 	}
 
+	/* set bonding device ether type by slave - bonding netdevices are
+	 * created with ether_setup, so when the slave type is not ARPHRD_ETHER
+	 * there is a need to override some of the type dependent attribs/funcs.
+	 *
+	 * bond ether type mutual exclusion - don't allow slaves of dissimilar
+	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
+	 */
+	if (bond->slave_cnt == 0) {
+		if (slave_dev->type != ARPHRD_ETHER)
+			bond_setup_by_slave(bond_dev, slave_dev);
+	} else if (bond_dev->type != slave_dev->type) {
+		printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different "
+			"from other slaves (%d), can not enslave it.\n",
+			slave_dev->name,
+			slave_dev->type, bond_dev->type);
+			res = -EINVAL;
+			goto err_undo_flags;
+	}
+
 	if (slave_dev->set_mac_address == NULL) {
-		printk(KERN_ERR DRV_NAME
-			": %s: Error: The slave device you specified does "
-			"not support setting the MAC address. "
-			"Your kernel likely does not support slave "
-			"devices.\n", bond_dev->name);
-  		res = -EOPNOTSUPP;
-		goto err_undo_flags;
+		if (bond->slave_cnt == 0) {
+			printk(KERN_WARNING DRV_NAME
+			       ": %s: Warning: The first slave device "
+			       "specified does not support setting the MAC "
+			       "address. Enabling the fail_over_mac option.",
+			       bond_dev->name);
+			bond->params.fail_over_mac = 1;
+		} else if (!bond->params.fail_over_mac) {
+			printk(KERN_ERR DRV_NAME
+				": %s: Error: The slave device specified "
+				"does not support setting the MAC address, "
+				"but fail_over_mac is not enabled.\n"
+				, bond_dev->name);
+			res = -EOPNOTSUPP;
+			goto err_undo_flags;
+		}
 	}
 
 	new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
@@ -1340,16 +1404,18 @@
 	 */
 	memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
 
-	/*
-	 * Set slave to master's mac address.  The application already
-	 * set the master's mac address to that of the first slave
-	 */
-	memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
-	addr.sa_family = slave_dev->type;
-	res = dev_set_mac_address(slave_dev, &addr);
-	if (res) {
-		dprintk("Error %d calling set_mac_address\n", res);
-		goto err_free;
+	if (!bond->params.fail_over_mac) {
+		/*
+		 * Set slave to master's mac address.  The application already
+		 * set the master's mac address to that of the first slave
+		 */
+		memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
+		addr.sa_family = slave_dev->type;
+		res = dev_set_mac_address(slave_dev, &addr);
+		if (res) {
+			dprintk("Error %d calling set_mac_address\n", res);
+			goto err_free;
+		}
 	}
 
 	res = netdev_set_master(slave_dev, bond_dev);
@@ -1574,9 +1640,11 @@
 	dev_close(slave_dev);
 
 err_restore_mac:
-	memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
-	addr.sa_family = slave_dev->type;
-	dev_set_mac_address(slave_dev, &addr);
+	if (!bond->params.fail_over_mac) {
+		memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
+		addr.sa_family = slave_dev->type;
+		dev_set_mac_address(slave_dev, &addr);
+	}
 
 err_free:
 	kfree(new_slave);
@@ -1749,10 +1817,12 @@
 	/* close slave before restoring its mac address */
 	dev_close(slave_dev);
 
-	/* restore original ("permanent") mac address */
-	memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
-	addr.sa_family = slave_dev->type;
-	dev_set_mac_address(slave_dev, &addr);
+	if (!bond->params.fail_over_mac) {
+		/* restore original ("permanent") mac address */
+		memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
+		addr.sa_family = slave_dev->type;
+		dev_set_mac_address(slave_dev, &addr);
+	}
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
 				   IFF_SLAVE_INACTIVE | IFF_BONDING |
@@ -1764,6 +1834,35 @@
 }
 
 /*
+* Destroy a bonding device.
+* Must be under rtnl_lock when this function is called.
+*/
+void bond_destroy(struct bonding *bond)
+{
+	bond_deinit(bond->dev);
+	bond_destroy_sysfs_entry(bond);
+	unregister_netdevice(bond->dev);
+}
+
+/*
+* First release a slave and than destroy the bond if no more slaves iare left.
+* Must be under rtnl_lock when this function is called.
+*/
+int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
+{
+	struct bonding *bond = bond_dev->priv;
+	int ret;
+
+	ret = bond_release(bond_dev, slave_dev);
+	if ((ret == 0) && (bond->slave_cnt == 0)) {
+		printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n",
+		       bond_dev->name, bond_dev->name);
+		bond_destroy(bond);
+	}
+	return ret;
+}
+
+/*
  * This function releases all slaves.
  */
 static int bond_release_all(struct net_device *bond_dev)
@@ -1839,10 +1938,12 @@
 		/* close slave before restoring its mac address */
 		dev_close(slave_dev);
 
-		/* restore original ("permanent") mac address*/
-		memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
-		addr.sa_family = slave_dev->type;
-		dev_set_mac_address(slave_dev, &addr);
+		if (!bond->params.fail_over_mac) {
+			/* restore original ("permanent") mac address*/
+			memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
+			addr.sa_family = slave_dev->type;
+			dev_set_mac_address(slave_dev, &addr);
+		}
 
 		slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
 					   IFF_SLAVE_INACTIVE);
@@ -2013,6 +2114,17 @@
 	 * program could monitor the link itself if needed.
 	 */
 
+	if (bond->send_grat_arp) {
+		if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING,
+				&bond->curr_active_slave->dev->state))
+			dprintk("Needs to send gratuitous arp but not yet\n");
+		else {
+			dprintk("sending delayed gratuitous arp on on %s\n",
+				bond->curr_active_slave->dev->name);
+			bond_send_gratuitous_arp(bond);
+			bond->send_grat_arp = 0;
+		}
+	}
 	read_lock(&bond->curr_slave_lock);
 	oldcurrent = bond->curr_active_slave;
 	read_unlock(&bond->curr_slave_lock);
@@ -2414,7 +2526,7 @@
 
 	if (bond->master_ip) {
 		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
-				  bond->master_ip, 0);
+				bond->master_ip, 0);
 	}
 
 	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -2951,9 +3063,15 @@
 	curr = bond->curr_active_slave;
 	read_unlock(&bond->curr_slave_lock);
 
-	seq_printf(seq, "Bonding Mode: %s\n",
+	seq_printf(seq, "Bonding Mode: %s",
 		   bond_mode_name(bond->params.mode));
 
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
+	    bond->params.fail_over_mac)
+		seq_printf(seq, " (fail_over_mac)");
+
+	seq_printf(seq, "\n");
+
 	if (bond->params.mode == BOND_MODE_XOR ||
 		bond->params.mode == BOND_MODE_8023AD) {
 		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
@@ -3248,6 +3366,11 @@
 		 * ... Or is it this?
 		 */
 		break;
+	case NETDEV_GOING_DOWN:
+		dprintk("slave %s is going down\n", slave_dev->name);
+		if (bond->setup_by_slave)
+			bond_release_and_destroy(bond_dev, slave_dev);
+		break;
 	case NETDEV_CHANGEMTU:
 		/*
 		 * TODO: Should slaves be allowed to
@@ -3880,6 +4003,13 @@
 
 	dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
 
+	/*
+	 * If fail_over_mac is enabled, do nothing and return success.
+	 * Returning an error causes ifenslave to fail.
+	 */
+	if (bond->params.fail_over_mac)
+		return 0;
+
 	if (!is_valid_ether_addr(sa->sa_data)) {
 		return -EADDRNOTAVAIL;
 	}
@@ -4217,6 +4347,8 @@
 	bond->current_arp_slave = NULL;
 	bond->primary_slave = NULL;
 	bond->dev = bond_dev;
+	bond->send_grat_arp = 0;
+	bond->setup_by_slave = 0;
 	INIT_LIST_HEAD(&bond->vlan_list);
 
 	/* Initialize the device entry points */
@@ -4265,7 +4397,6 @@
 #ifdef CONFIG_PROC_FS
 	bond_create_proc_entry(bond);
 #endif
-
 	list_add_tail(&bond->bond_list, &bond_dev_list);
 
 	return 0;
@@ -4599,6 +4730,11 @@
 		primary = NULL;
 	}
 
+	if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP))
+		printk(KERN_WARNING DRV_NAME
+		       ": Warning: fail_over_mac only affects "
+		       "active-backup mode.\n");
+
 	/* fill params struct with the proper values */
 	params->mode = bond_mode;
 	params->xmit_policy = xmit_hashtype;
@@ -4610,6 +4746,7 @@
 	params->use_carrier = use_carrier;
 	params->lacp_fast = lacp_fast;
 	params->primary[0] = 0;
+	params->fail_over_mac = fail_over_mac;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 6f49ca7..80c0c8c4 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -164,9 +164,7 @@
 				printk(KERN_INFO DRV_NAME
 					": %s is being deleted...\n",
 					bond->dev->name);
-				bond_deinit(bond->dev);
-		        	bond_destroy_sysfs_entry(bond);
-				unregister_netdevice(bond->dev);
+				bond_destroy(bond);
 				rtnl_unlock();
 				goto out;
 			}
@@ -260,17 +258,16 @@
 	char command[IFNAMSIZ + 1] = { 0, };
 	char *ifname;
 	int i, res, found, ret = count;
+	u32 original_mtu;
 	struct slave *slave;
 	struct net_device *dev = NULL;
 	struct bonding *bond = to_bond(d);
 
 	/* Quick sanity check -- is the bond interface up? */
 	if (!(bond->dev->flags & IFF_UP)) {
-		printk(KERN_ERR DRV_NAME
-		       ": %s: Unable to update slaves because interface is down.\n",
+		printk(KERN_WARNING DRV_NAME
+		       ": %s: doing slave updates when interface is down.\n",
 		       bond->dev->name);
-		ret = -EPERM;
-		goto out;
 	}
 
 	/* Note:  We can't hold bond->lock here, as bond_create grabs it. */
@@ -327,6 +324,7 @@
 		}
 
 		/* Set the slave's MTU to match the bond */
+		original_mtu = dev->mtu;
 		if (dev->mtu != bond->dev->mtu) {
 			if (dev->change_mtu) {
 				res = dev->change_mtu(dev,
@@ -341,6 +339,9 @@
 		}
 		rtnl_lock();
 		res = bond_enslave(bond->dev, dev);
+		bond_for_each_slave(bond, slave, i)
+			if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0)
+				slave->original_mtu = original_mtu;
 		rtnl_unlock();
 		if (res) {
 			ret = res;
@@ -353,13 +354,17 @@
 		bond_for_each_slave(bond, slave, i)
 			if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
 				dev = slave->dev;
+				original_mtu = slave->original_mtu;
 				break;
 			}
 		if (dev) {
 			printk(KERN_INFO DRV_NAME ": %s: Removing slave %s\n",
 				bond->dev->name, dev->name);
 			rtnl_lock();
-			res = bond_release(bond->dev, dev);
+			if (bond->setup_by_slave)
+				res = bond_release_and_destroy(bond->dev, dev);
+			else
+				res = bond_release(bond->dev, dev);
 			rtnl_unlock();
 			if (res) {
 				ret = res;
@@ -367,9 +372,9 @@
 			}
 			/* set the slave MTU to the default */
 			if (dev->change_mtu) {
-				dev->change_mtu(dev, 1500);
+				dev->change_mtu(dev, original_mtu);
 			} else {
-				dev->mtu = 1500;
+				dev->mtu = original_mtu;
 			}
 		}
 		else {
@@ -563,6 +568,54 @@
 static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate);
 
 /*
+ * Show and store fail_over_mac.  User only allowed to change the
+ * value when there are no slaves.
+ */
+static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.fail_over_mac) + 1;
+}
+
+static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int new_value;
+	int ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (bond->slave_cnt != 0) {
+		printk(KERN_ERR DRV_NAME
+		       ": %s: Can't alter fail_over_mac with slaves in bond.\n",
+		       bond->dev->name);
+		ret = -EPERM;
+		goto out;
+	}
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		printk(KERN_ERR DRV_NAME
+		       ": %s: no fail_over_mac value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if ((new_value == 0) || (new_value == 1)) {
+		bond->params.fail_over_mac = new_value;
+		printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %d.\n",
+		       bond->dev->name, new_value);
+	} else {
+		printk(KERN_INFO DRV_NAME
+		       ": %s: Ignoring invalid fail_over_mac value %d.\n",
+		       bond->dev->name, new_value);
+	}
+out:
+	return ret;
+}
+
+static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac);
+
+/*
  * Show and set the arp timer interval.  There are two tricky bits
  * here.  First, if ARP monitoring is activated, then we must disable
  * MII monitoring.  Second, if the ARP timer isn't running, we must
@@ -1383,6 +1436,7 @@
 static struct attribute *per_bond_attrs[] = {
 	&dev_attr_slaves.attr,
 	&dev_attr_mode.attr,
+	&dev_attr_fail_over_mac.attr,
 	&dev_attr_arp_validate.attr,
 	&dev_attr_arp_interval.attr,
 	&dev_attr_arp_ip_target.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 2a6af7d..a8bbd56 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -22,8 +22,8 @@
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
-#define DRV_VERSION	"3.1.3"
-#define DRV_RELDATE	"June 13, 2007"
+#define DRV_VERSION	"3.2.0"
+#define DRV_RELDATE	"September 13, 2007"
 #define DRV_NAME	"bonding"
 #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"
 
@@ -128,6 +128,7 @@
 	int arp_interval;
 	int arp_validate;
 	int use_carrier;
+	int fail_over_mac;
 	int updelay;
 	int downdelay;
 	int lacp_fast;
@@ -156,6 +157,7 @@
 	s8     link;    /* one of BOND_LINK_XXXX */
 	s8     state;   /* one of BOND_STATE_XXXX */
 	u32    original_flags;
+	u32    original_mtu;
 	u32    link_failure_count;
 	u16    speed;
 	u8     duplex;
@@ -185,6 +187,8 @@
 	struct   timer_list mii_timer;
 	struct   timer_list arp_timer;
 	s8       kill_timers;
+	s8	 send_grat_arp;
+	s8	 setup_by_slave;
 	struct   net_device_stats stats;
 #ifdef CONFIG_PROC_FS
 	struct   proc_dir_entry *proc_entry;
@@ -292,6 +296,8 @@
 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 int bond_create(char *name, struct bond_params *params, struct bonding **newbond);
+void bond_destroy(struct bonding *bond);
+int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev);
 void bond_deinit(struct net_device *bond_dev);
 int bond_create_sysfs(void);
 void bond_destroy_sysfs(void);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 563bf5f..7df31b5 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -4443,7 +4443,7 @@
 	{REG_MAC_COLL_EXCESS},
 	{REG_MAC_COLL_LATE}
 };
-#define CAS_REG_LEN 	(sizeof(ethtool_register_table)/sizeof(int))
+#define CAS_REG_LEN 	ARRAY_SIZE(ethtool_register_table)
 #define CAS_MAX_REGS 	(sizeof (u32)*CAS_REG_LEN)
 
 static void cas_read_regs(struct cas *cp, u8 *ptr, int len)
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
new file mode 100644
index 0000000..ed53aaa
--- /dev/null
+++ b/drivers/net/cpmac.c
@@ -0,0 +1,1174 @@
+/*
+ * Copyright (C) 2006, 2007 Eugene Konev
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <asm/gpio.h>
+
+MODULE_AUTHOR("Eugene Konev <ejka@imfi.kspu.ru>");
+MODULE_DESCRIPTION("TI AR7 ethernet driver (CPMAC)");
+MODULE_LICENSE("GPL");
+
+static int debug_level = 8;
+static int dumb_switch;
+
+/* Next 2 are only used in cpmac_probe, so it's pointless to change them */
+module_param(debug_level, int, 0444);
+module_param(dumb_switch, int, 0444);
+
+MODULE_PARM_DESC(debug_level, "Number of NETIF_MSG bits to enable");
+MODULE_PARM_DESC(dumb_switch, "Assume switch is not connected to MDIO bus");
+
+#define CPMAC_VERSION "0.5.0"
+/* stolen from net/ieee80211.h */
+#ifndef MAC_FMT
+#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define MAC_ARG(x) ((u8*)(x))[0], ((u8*)(x))[1], ((u8*)(x))[2], \
+		   ((u8*)(x))[3], ((u8*)(x))[4], ((u8*)(x))[5]
+#endif
+/* frame size + 802.1q tag */
+#define CPMAC_SKB_SIZE		(ETH_FRAME_LEN + 4)
+#define CPMAC_QUEUES	8
+
+/* Ethernet registers */
+#define CPMAC_TX_CONTROL		0x0004
+#define CPMAC_TX_TEARDOWN		0x0008
+#define CPMAC_RX_CONTROL		0x0014
+#define CPMAC_RX_TEARDOWN		0x0018
+#define CPMAC_MBP			0x0100
+# define MBP_RXPASSCRC			0x40000000
+# define MBP_RXQOS			0x20000000
+# define MBP_RXNOCHAIN			0x10000000
+# define MBP_RXCMF			0x01000000
+# define MBP_RXSHORT			0x00800000
+# define MBP_RXCEF			0x00400000
+# define MBP_RXPROMISC			0x00200000
+# define MBP_PROMISCCHAN(channel)	(((channel) & 0x7) << 16)
+# define MBP_RXBCAST			0x00002000
+# define MBP_BCASTCHAN(channel)		(((channel) & 0x7) << 8)
+# define MBP_RXMCAST			0x00000020
+# define MBP_MCASTCHAN(channel)		((channel) & 0x7)
+#define CPMAC_UNICAST_ENABLE		0x0104
+#define CPMAC_UNICAST_CLEAR		0x0108
+#define CPMAC_MAX_LENGTH		0x010c
+#define CPMAC_BUFFER_OFFSET		0x0110
+#define CPMAC_MAC_CONTROL		0x0160
+# define MAC_TXPTYPE			0x00000200
+# define MAC_TXPACE			0x00000040
+# define MAC_MII			0x00000020
+# define MAC_TXFLOW			0x00000010
+# define MAC_RXFLOW			0x00000008
+# define MAC_MTEST			0x00000004
+# define MAC_LOOPBACK			0x00000002
+# define MAC_FDX			0x00000001
+#define CPMAC_MAC_STATUS		0x0164
+# define MAC_STATUS_QOS			0x00000004
+# define MAC_STATUS_RXFLOW		0x00000002
+# define MAC_STATUS_TXFLOW		0x00000001
+#define CPMAC_TX_INT_ENABLE		0x0178
+#define CPMAC_TX_INT_CLEAR		0x017c
+#define CPMAC_MAC_INT_VECTOR		0x0180
+# define MAC_INT_STATUS			0x00080000
+# define MAC_INT_HOST			0x00040000
+# define MAC_INT_RX			0x00020000
+# define MAC_INT_TX			0x00010000
+#define CPMAC_MAC_EOI_VECTOR		0x0184
+#define CPMAC_RX_INT_ENABLE		0x0198
+#define CPMAC_RX_INT_CLEAR		0x019c
+#define CPMAC_MAC_INT_ENABLE		0x01a8
+#define CPMAC_MAC_INT_CLEAR		0x01ac
+#define CPMAC_MAC_ADDR_LO(channel) 	(0x01b0 + (channel) * 4)
+#define CPMAC_MAC_ADDR_MID		0x01d0
+#define CPMAC_MAC_ADDR_HI		0x01d4
+#define CPMAC_MAC_HASH_LO		0x01d8
+#define CPMAC_MAC_HASH_HI		0x01dc
+#define CPMAC_TX_PTR(channel)		(0x0600 + (channel) * 4)
+#define CPMAC_RX_PTR(channel)		(0x0620 + (channel) * 4)
+#define CPMAC_TX_ACK(channel)		(0x0640 + (channel) * 4)
+#define CPMAC_RX_ACK(channel)		(0x0660 + (channel) * 4)
+#define CPMAC_REG_END			0x0680
+/*
+ * Rx/Tx statistics
+ * TODO: use some of them to fill stats in cpmac_stats()
+ */
+#define CPMAC_STATS_RX_GOOD		0x0200
+#define CPMAC_STATS_RX_BCAST		0x0204
+#define CPMAC_STATS_RX_MCAST		0x0208
+#define CPMAC_STATS_RX_PAUSE		0x020c
+#define CPMAC_STATS_RX_CRC		0x0210
+#define CPMAC_STATS_RX_ALIGN		0x0214
+#define CPMAC_STATS_RX_OVER		0x0218
+#define CPMAC_STATS_RX_JABBER		0x021c
+#define CPMAC_STATS_RX_UNDER		0x0220
+#define CPMAC_STATS_RX_FRAG		0x0224
+#define CPMAC_STATS_RX_FILTER		0x0228
+#define CPMAC_STATS_RX_QOSFILTER	0x022c
+#define CPMAC_STATS_RX_OCTETS		0x0230
+
+#define CPMAC_STATS_TX_GOOD		0x0234
+#define CPMAC_STATS_TX_BCAST		0x0238
+#define CPMAC_STATS_TX_MCAST		0x023c
+#define CPMAC_STATS_TX_PAUSE		0x0240
+#define CPMAC_STATS_TX_DEFER		0x0244
+#define CPMAC_STATS_TX_COLLISION	0x0248
+#define CPMAC_STATS_TX_SINGLECOLL	0x024c
+#define CPMAC_STATS_TX_MULTICOLL	0x0250
+#define CPMAC_STATS_TX_EXCESSCOLL	0x0254
+#define CPMAC_STATS_TX_LATECOLL		0x0258
+#define CPMAC_STATS_TX_UNDERRUN		0x025c
+#define CPMAC_STATS_TX_CARRIERSENSE	0x0260
+#define CPMAC_STATS_TX_OCTETS		0x0264
+
+#define cpmac_read(base, reg)		(readl((void __iomem *)(base) + (reg)))
+#define cpmac_write(base, reg, val)	(writel(val, (void __iomem *)(base) + \
+						(reg)))
+
+/* MDIO bus */
+#define CPMAC_MDIO_VERSION		0x0000
+#define CPMAC_MDIO_CONTROL		0x0004
+# define MDIOC_IDLE			0x80000000
+# define MDIOC_ENABLE			0x40000000
+# define MDIOC_PREAMBLE			0x00100000
+# define MDIOC_FAULT			0x00080000
+# define MDIOC_FAULTDETECT		0x00040000
+# define MDIOC_INTTEST			0x00020000
+# define MDIOC_CLKDIV(div)		((div) & 0xff)
+#define CPMAC_MDIO_ALIVE		0x0008
+#define CPMAC_MDIO_LINK			0x000c
+#define CPMAC_MDIO_ACCESS(channel)	(0x0080 + (channel) * 8)
+# define MDIO_BUSY			0x80000000
+# define MDIO_WRITE			0x40000000
+# define MDIO_REG(reg)			(((reg) & 0x1f) << 21)
+# define MDIO_PHY(phy)			(((phy) & 0x1f) << 16)
+# define MDIO_DATA(data)		((data) & 0xffff)
+#define CPMAC_MDIO_PHYSEL(channel)	(0x0084 + (channel) * 8)
+# define PHYSEL_LINKSEL			0x00000040
+# define PHYSEL_LINKINT			0x00000020
+
+struct cpmac_desc {
+	u32 hw_next;
+	u32 hw_data;
+	u16 buflen;
+	u16 bufflags;
+	u16 datalen;
+	u16 dataflags;
+#define CPMAC_SOP			0x8000
+#define CPMAC_EOP			0x4000
+#define CPMAC_OWN			0x2000
+#define CPMAC_EOQ			0x1000
+	struct sk_buff *skb;
+	struct cpmac_desc *next;
+	dma_addr_t mapping;
+	dma_addr_t data_mapping;
+};
+
+struct cpmac_priv {
+	spinlock_t lock;
+	spinlock_t rx_lock;
+	struct cpmac_desc *rx_head;
+	int ring_size;
+	struct cpmac_desc *desc_ring;
+	dma_addr_t dma_ring;
+	void __iomem *regs;
+	struct mii_bus *mii_bus;
+	struct phy_device *phy;
+	char phy_name[BUS_ID_SIZE];
+	int oldlink, oldspeed, oldduplex;
+	u32 msg_enable;
+	struct net_device *dev;
+	struct work_struct reset_work;
+	struct platform_device *pdev;
+};
+
+static irqreturn_t cpmac_irq(int, void *);
+static void cpmac_hw_start(struct net_device *dev);
+static void cpmac_hw_stop(struct net_device *dev);
+static int cpmac_stop(struct net_device *dev);
+static int cpmac_open(struct net_device *dev);
+
+static void cpmac_dump_regs(struct net_device *dev)
+{
+	int i;
+	struct cpmac_priv *priv = netdev_priv(dev);
+	for (i = 0; i < CPMAC_REG_END; i += 4) {
+		if (i % 16 == 0) {
+			if (i)
+				printk("\n");
+			printk(KERN_DEBUG "%s: reg[%p]:", dev->name,
+			       priv->regs + i);
+		}
+		printk(" %08x", cpmac_read(priv->regs, i));
+	}
+	printk("\n");
+}
+
+static void cpmac_dump_desc(struct net_device *dev, struct cpmac_desc *desc)
+{
+	int i;
+	printk(KERN_DEBUG "%s: desc[%p]:", dev->name, desc);
+	for (i = 0; i < sizeof(*desc) / 4; i++)
+		printk(" %08x", ((u32 *)desc)[i]);
+	printk("\n");
+}
+
+static void cpmac_dump_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	int i;
+	printk(KERN_DEBUG "%s: skb 0x%p, len=%d\n", dev->name, skb, skb->len);
+	for (i = 0; i < skb->len; i++) {
+		if (i % 16 == 0) {
+			if (i)
+				printk("\n");
+			printk(KERN_DEBUG "%s: data[%p]:", dev->name,
+			       skb->data + i);
+		}
+		printk(" %02x", ((u8 *)skb->data)[i]);
+	}
+	printk("\n");
+}
+
+static int cpmac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	u32 val;
+
+	while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY)
+		cpu_relax();
+	cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_REG(reg) |
+		    MDIO_PHY(phy_id));
+	while ((val = cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0))) & MDIO_BUSY)
+		cpu_relax();
+	return MDIO_DATA(val);
+}
+
+static int cpmac_mdio_write(struct mii_bus *bus, int phy_id,
+			    int reg, u16 val)
+{
+	while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY)
+		cpu_relax();
+	cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_WRITE |
+		    MDIO_REG(reg) | MDIO_PHY(phy_id) | MDIO_DATA(val));
+	return 0;
+}
+
+static int cpmac_mdio_reset(struct mii_bus *bus)
+{
+	ar7_device_reset(AR7_RESET_BIT_MDIO);
+	cpmac_write(bus->priv, CPMAC_MDIO_CONTROL, MDIOC_ENABLE |
+		    MDIOC_CLKDIV(ar7_cpmac_freq() / 2200000 - 1));
+	return 0;
+}
+
+static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, };
+
+static struct mii_bus cpmac_mii = {
+	.name = "cpmac-mii",
+	.read = cpmac_mdio_read,
+	.write = cpmac_mdio_write,
+	.reset = cpmac_mdio_reset,
+	.irq = mii_irqs,
+};
+
+static int cpmac_config(struct net_device *dev, struct ifmap *map)
+{
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	/* Don't allow changing the I/O address */
+	if (map->base_addr != dev->base_addr)
+		return -EOPNOTSUPP;
+
+	/* ignore other fields */
+	return 0;
+}
+
+static void cpmac_set_multicast_list(struct net_device *dev)
+{
+	struct dev_mc_list *iter;
+	int i;
+	u8 tmp;
+	u32 mbp, bit, hash[2] = { 0, };
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	mbp = cpmac_read(priv->regs, CPMAC_MBP);
+	if (dev->flags & IFF_PROMISC) {
+		cpmac_write(priv->regs, CPMAC_MBP, (mbp & ~MBP_PROMISCCHAN(0)) |
+			    MBP_RXPROMISC);
+	} else {
+		cpmac_write(priv->regs, CPMAC_MBP, mbp & ~MBP_RXPROMISC);
+		if (dev->flags & IFF_ALLMULTI) {
+			/* enable all multicast mode */
+			cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, 0xffffffff);
+			cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, 0xffffffff);
+		} else {
+			/*
+			 * cpmac uses some strange mac address hashing
+			 * (not crc32)
+			 */
+			for (i = 0, iter = dev->mc_list; i < dev->mc_count;
+			     i++, iter = iter->next) {
+				bit = 0;
+				tmp = iter->dmi_addr[0];
+				bit  ^= (tmp >> 2) ^ (tmp << 4);
+				tmp = iter->dmi_addr[1];
+				bit  ^= (tmp >> 4) ^ (tmp << 2);
+				tmp = iter->dmi_addr[2];
+				bit  ^= (tmp >> 6) ^ tmp;
+				tmp = iter->dmi_addr[3];
+				bit  ^= (tmp >> 2) ^ (tmp << 4);
+				tmp = iter->dmi_addr[4];
+				bit  ^= (tmp >> 4) ^ (tmp << 2);
+				tmp = iter->dmi_addr[5];
+				bit  ^= (tmp >> 6) ^ tmp;
+				bit &= 0x3f;
+				hash[bit / 32] |= 1 << (bit % 32);
+			}
+
+			cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, hash[0]);
+			cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, hash[1]);
+		}
+	}
+}
+
+static struct sk_buff *cpmac_rx_one(struct net_device *dev,
+				    struct cpmac_priv *priv,
+				    struct cpmac_desc *desc)
+{
+	struct sk_buff *skb, *result = NULL;
+
+	if (unlikely(netif_msg_hw(priv)))
+		cpmac_dump_desc(dev, desc);
+	cpmac_write(priv->regs, CPMAC_RX_ACK(0), (u32)desc->mapping);
+	if (unlikely(!desc->datalen)) {
+		if (netif_msg_rx_err(priv) && net_ratelimit())
+			printk(KERN_WARNING "%s: rx: spurious interrupt\n",
+			       dev->name);
+		return NULL;
+	}
+
+	skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE);
+	if (likely(skb)) {
+		skb_reserve(skb, 2);
+		skb_put(desc->skb, desc->datalen);
+		desc->skb->protocol = eth_type_trans(desc->skb, dev);
+		desc->skb->ip_summed = CHECKSUM_NONE;
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += desc->datalen;
+		result = desc->skb;
+		dma_unmap_single(&dev->dev, desc->data_mapping, CPMAC_SKB_SIZE,
+				 DMA_FROM_DEVICE);
+		desc->skb = skb;
+		desc->data_mapping = dma_map_single(&dev->dev, skb->data,
+						    CPMAC_SKB_SIZE,
+						    DMA_FROM_DEVICE);
+		desc->hw_data = (u32)desc->data_mapping;
+		if (unlikely(netif_msg_pktdata(priv))) {
+			printk(KERN_DEBUG "%s: received packet:\n", dev->name);
+			cpmac_dump_skb(dev, result);
+		}
+	} else {
+		if (netif_msg_rx_err(priv) && net_ratelimit())
+			printk(KERN_WARNING
+			       "%s: low on skbs, dropping packet\n", dev->name);
+		dev->stats.rx_dropped++;
+	}
+
+	desc->buflen = CPMAC_SKB_SIZE;
+	desc->dataflags = CPMAC_OWN;
+
+	return result;
+}
+
+static int cpmac_poll(struct net_device *dev, int *budget)
+{
+	struct sk_buff *skb;
+	struct cpmac_desc *desc;
+	int received = 0, quota = min(dev->quota, *budget);
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	spin_lock(&priv->rx_lock);
+	if (unlikely(!priv->rx_head)) {
+		if (netif_msg_rx_err(priv) && net_ratelimit())
+			printk(KERN_WARNING "%s: rx: polling, but no queue\n",
+			       dev->name);
+		netif_rx_complete(dev);
+		return 0;
+	}
+
+	desc = priv->rx_head;
+	while ((received < quota) && ((desc->dataflags & CPMAC_OWN) == 0)) {
+		skb = cpmac_rx_one(dev, priv, desc);
+		if (likely(skb)) {
+			netif_receive_skb(skb);
+			received++;
+		}
+		desc = desc->next;
+	}
+
+	priv->rx_head = desc;
+	spin_unlock(&priv->rx_lock);
+	*budget -= received;
+	dev->quota -= received;
+	if (unlikely(netif_msg_rx_status(priv)))
+		printk(KERN_DEBUG "%s: poll processed %d packets\n", dev->name,
+		       received);
+	if (desc->dataflags & CPMAC_OWN) {
+		netif_rx_complete(dev);
+		cpmac_write(priv->regs, CPMAC_RX_PTR(0), (u32)desc->mapping);
+		cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	int queue, len;
+	struct cpmac_desc *desc;
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	if (unlikely(skb_padto(skb, ETH_ZLEN))) {
+		if (netif_msg_tx_err(priv) && net_ratelimit())
+			printk(KERN_WARNING
+			       "%s: tx: padding failed, dropping\n", dev->name);
+		spin_lock(&priv->lock);
+		dev->stats.tx_dropped++;
+		spin_unlock(&priv->lock);
+		return -ENOMEM;
+	}
+
+	len = max(skb->len, ETH_ZLEN);
+	queue = skb->queue_mapping;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	netif_stop_subqueue(dev, queue);
+#else
+	netif_stop_queue(dev);
+#endif
+
+	desc = &priv->desc_ring[queue];
+	if (unlikely(desc->dataflags & CPMAC_OWN)) {
+		if (netif_msg_tx_err(priv) && net_ratelimit())
+			printk(KERN_WARNING "%s: tx dma ring full, dropping\n",
+			       dev->name);
+		spin_lock(&priv->lock);
+		dev->stats.tx_dropped++;
+		spin_unlock(&priv->lock);
+		dev_kfree_skb_any(skb);
+		return -ENOMEM;
+	}
+
+	spin_lock(&priv->lock);
+	dev->trans_start = jiffies;
+	spin_unlock(&priv->lock);
+	desc->dataflags = CPMAC_SOP | CPMAC_EOP | CPMAC_OWN;
+	desc->skb = skb;
+	desc->data_mapping = dma_map_single(&dev->dev, skb->data, len,
+					    DMA_TO_DEVICE);
+	desc->hw_data = (u32)desc->data_mapping;
+	desc->datalen = len;
+	desc->buflen = len;
+	if (unlikely(netif_msg_tx_queued(priv)))
+		printk(KERN_DEBUG "%s: sending 0x%p, len=%d\n", dev->name, skb,
+		       skb->len);
+	if (unlikely(netif_msg_hw(priv)))
+		cpmac_dump_desc(dev, desc);
+	if (unlikely(netif_msg_pktdata(priv)))
+		cpmac_dump_skb(dev, skb);
+	cpmac_write(priv->regs, CPMAC_TX_PTR(queue), (u32)desc->mapping);
+
+	return 0;
+}
+
+static void cpmac_end_xmit(struct net_device *dev, int queue)
+{
+	struct cpmac_desc *desc;
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	desc = &priv->desc_ring[queue];
+	cpmac_write(priv->regs, CPMAC_TX_ACK(queue), (u32)desc->mapping);
+	if (likely(desc->skb)) {
+		spin_lock(&priv->lock);
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += desc->skb->len;
+		spin_unlock(&priv->lock);
+		dma_unmap_single(&dev->dev, desc->data_mapping, desc->skb->len,
+				 DMA_TO_DEVICE);
+
+		if (unlikely(netif_msg_tx_done(priv)))
+			printk(KERN_DEBUG "%s: sent 0x%p, len=%d\n", dev->name,
+			       desc->skb, desc->skb->len);
+
+		dev_kfree_skb_irq(desc->skb);
+		desc->skb = NULL;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+		if (netif_subqueue_stopped(dev, queue))
+			netif_wake_subqueue(dev, queue);
+#else
+		if (netif_queue_stopped(dev))
+			netif_wake_queue(dev);
+#endif
+	} else {
+		if (netif_msg_tx_err(priv) && net_ratelimit())
+			printk(KERN_WARNING
+			       "%s: end_xmit: spurious interrupt\n", dev->name);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+		if (netif_subqueue_stopped(dev, queue))
+			netif_wake_subqueue(dev, queue);
+#else
+		if (netif_queue_stopped(dev))
+			netif_wake_queue(dev);
+#endif
+	}
+}
+
+static void cpmac_hw_stop(struct net_device *dev)
+{
+	int i;
+	struct cpmac_priv *priv = netdev_priv(dev);
+	struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data;
+
+	ar7_device_reset(pdata->reset_bit);
+	cpmac_write(priv->regs, CPMAC_RX_CONTROL,
+		    cpmac_read(priv->regs, CPMAC_RX_CONTROL) & ~1);
+	cpmac_write(priv->regs, CPMAC_TX_CONTROL,
+		    cpmac_read(priv->regs, CPMAC_TX_CONTROL) & ~1);
+	for (i = 0; i < 8; i++) {
+		cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0);
+		cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0);
+	}
+	cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_MAC_CONTROL,
+		    cpmac_read(priv->regs, CPMAC_MAC_CONTROL) & ~MAC_MII);
+}
+
+static void cpmac_hw_start(struct net_device *dev)
+{
+	int i;
+	struct cpmac_priv *priv = netdev_priv(dev);
+	struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data;
+
+	ar7_device_reset(pdata->reset_bit);
+	for (i = 0; i < 8; i++) {
+		cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0);
+		cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0);
+	}
+	cpmac_write(priv->regs, CPMAC_RX_PTR(0), priv->rx_head->mapping);
+
+	cpmac_write(priv->regs, CPMAC_MBP, MBP_RXSHORT | MBP_RXBCAST |
+		    MBP_RXMCAST);
+	cpmac_write(priv->regs, CPMAC_BUFFER_OFFSET, 0);
+	for (i = 0; i < 8; i++)
+		cpmac_write(priv->regs, CPMAC_MAC_ADDR_LO(i), dev->dev_addr[5]);
+	cpmac_write(priv->regs, CPMAC_MAC_ADDR_MID, dev->dev_addr[4]);
+	cpmac_write(priv->regs, CPMAC_MAC_ADDR_HI, dev->dev_addr[0] |
+		    (dev->dev_addr[1] << 8) | (dev->dev_addr[2] << 16) |
+		    (dev->dev_addr[3] << 24));
+	cpmac_write(priv->regs, CPMAC_MAX_LENGTH, CPMAC_SKB_SIZE);
+	cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff);
+	cpmac_write(priv->regs, CPMAC_UNICAST_ENABLE, 1);
+	cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1);
+	cpmac_write(priv->regs, CPMAC_TX_INT_ENABLE, 0xff);
+	cpmac_write(priv->regs, CPMAC_MAC_INT_ENABLE, 3);
+
+	cpmac_write(priv->regs, CPMAC_RX_CONTROL,
+		    cpmac_read(priv->regs, CPMAC_RX_CONTROL) | 1);
+	cpmac_write(priv->regs, CPMAC_TX_CONTROL,
+		    cpmac_read(priv->regs, CPMAC_TX_CONTROL) | 1);
+	cpmac_write(priv->regs, CPMAC_MAC_CONTROL,
+		    cpmac_read(priv->regs, CPMAC_MAC_CONTROL) | MAC_MII |
+		    MAC_FDX);
+}
+
+static void cpmac_clear_rx(struct net_device *dev)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+	struct cpmac_desc *desc;
+	int i;
+	if (unlikely(!priv->rx_head))
+		return;
+	desc = priv->rx_head;
+	for (i = 0; i < priv->ring_size; i++) {
+		if ((desc->dataflags & CPMAC_OWN) == 0) {
+			if (netif_msg_rx_err(priv) && net_ratelimit())
+				printk(KERN_WARNING "%s: packet dropped\n",
+				       dev->name);
+			if (unlikely(netif_msg_hw(priv)))
+				cpmac_dump_desc(dev, desc);
+			desc->dataflags = CPMAC_OWN;
+			dev->stats.rx_dropped++;
+		}
+		desc = desc->next;
+	}
+}
+
+static void cpmac_clear_tx(struct net_device *dev)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+	int i;
+	if (unlikely(!priv->desc_ring))
+		return;
+	for (i = 0; i < CPMAC_QUEUES; i++)
+		if (priv->desc_ring[i].skb) {
+			dev_kfree_skb_any(priv->desc_ring[i].skb);
+			if (netif_subqueue_stopped(dev, i))
+			    netif_wake_subqueue(dev, i);
+		}
+}
+
+static void cpmac_hw_error(struct work_struct *work)
+{
+	struct cpmac_priv *priv =
+		container_of(work, struct cpmac_priv, reset_work);
+
+	spin_lock(&priv->rx_lock);
+	cpmac_clear_rx(priv->dev);
+	spin_unlock(&priv->rx_lock);
+	cpmac_clear_tx(priv->dev);
+	cpmac_hw_start(priv->dev);
+	netif_start_queue(priv->dev);
+}
+
+static irqreturn_t cpmac_irq(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct cpmac_priv *priv;
+	int queue;
+	u32 status;
+
+	if (!dev)
+		return IRQ_NONE;
+
+	priv = netdev_priv(dev);
+
+	status = cpmac_read(priv->regs, CPMAC_MAC_INT_VECTOR);
+
+	if (unlikely(netif_msg_intr(priv)))
+		printk(KERN_DEBUG "%s: interrupt status: 0x%08x\n", dev->name,
+		       status);
+
+	if (status & MAC_INT_TX)
+		cpmac_end_xmit(dev, (status & 7));
+
+	if (status & MAC_INT_RX) {
+		queue = (status >> 8) & 7;
+		netif_rx_schedule(dev);
+		cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 1 << queue);
+	}
+
+	cpmac_write(priv->regs, CPMAC_MAC_EOI_VECTOR, 0);
+
+	if (unlikely(status & (MAC_INT_HOST | MAC_INT_STATUS))) {
+		if (netif_msg_drv(priv) && net_ratelimit())
+			printk(KERN_ERR "%s: hw error, resetting...\n",
+			       dev->name);
+		netif_stop_queue(dev);
+		cpmac_hw_stop(dev);
+		schedule_work(&priv->reset_work);
+		if (unlikely(netif_msg_hw(priv)))
+			cpmac_dump_regs(dev);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void cpmac_tx_timeout(struct net_device *dev)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+	int i;
+
+	spin_lock(&priv->lock);
+	dev->stats.tx_errors++;
+	spin_unlock(&priv->lock);
+	if (netif_msg_tx_err(priv) && net_ratelimit())
+		printk(KERN_WARNING "%s: transmit timeout\n", dev->name);
+	/* 
+	 * FIXME: waking up random queue is not the best thing to
+	 * do... on the other hand why we got here at all?
+	 */
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	for (i = 0; i < CPMAC_QUEUES; i++)
+		if (priv->desc_ring[i].skb) {
+			dev_kfree_skb_any(priv->desc_ring[i].skb);
+			netif_wake_subqueue(dev, i);
+			break;
+		}
+#else
+	if (priv->desc_ring[0].skb)
+		dev_kfree_skb_any(priv->desc_ring[0].skb);
+	netif_wake_queue(dev);
+#endif
+}
+
+static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+	if (!(netif_running(dev)))
+		return -EINVAL;
+	if (!priv->phy)
+		return -EINVAL;
+	if ((cmd == SIOCGMIIPHY) || (cmd == SIOCGMIIREG) ||
+	    (cmd == SIOCSMIIREG))
+		return phy_mii_ioctl(priv->phy, if_mii(ifr), cmd);
+
+	return -EOPNOTSUPP;
+}
+
+static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	if (priv->phy)
+		return phy_ethtool_gset(priv->phy, cmd);
+
+	return -EINVAL;
+}
+
+static int cpmac_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (priv->phy)
+		return phy_ethtool_sset(priv->phy, cmd);
+
+	return -EINVAL;
+}
+
+static void cpmac_get_ringparam(struct net_device *dev, struct ethtool_ringparam* ring)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	ring->rx_max_pending = 1024;
+	ring->rx_mini_max_pending = 1;
+	ring->rx_jumbo_max_pending = 1;
+	ring->tx_max_pending = 1;
+
+	ring->rx_pending = priv->ring_size;
+	ring->rx_mini_pending = 1;
+	ring->rx_jumbo_pending = 1;
+	ring->tx_pending = 1;
+}
+
+static int cpmac_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ring)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+
+	if (dev->flags && IFF_UP)
+		return -EBUSY;
+	priv->ring_size = ring->rx_pending;
+	return 0;
+}
+
+static void cpmac_get_drvinfo(struct net_device *dev,
+			      struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "cpmac");
+	strcpy(info->version, CPMAC_VERSION);
+	info->fw_version[0] = '\0';
+	sprintf(info->bus_info, "%s", "cpmac");
+	info->regdump_len = 0;
+}
+
+static const struct ethtool_ops cpmac_ethtool_ops = {
+	.get_settings = cpmac_get_settings,
+	.set_settings = cpmac_set_settings,
+	.get_drvinfo = cpmac_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = cpmac_get_ringparam,
+	.set_ringparam = cpmac_set_ringparam,
+};
+
+static void cpmac_adjust_link(struct net_device *dev)
+{
+	struct cpmac_priv *priv = netdev_priv(dev);
+	int new_state = 0;
+
+	spin_lock(&priv->lock);
+	if (priv->phy->link) {
+		netif_start_queue(dev);
+		if (priv->phy->duplex != priv->oldduplex) {
+			new_state = 1;
+			priv->oldduplex = priv->phy->duplex;
+		}
+
+		if (priv->phy->speed != priv->oldspeed) {
+			new_state = 1;
+			priv->oldspeed = priv->phy->speed;
+		}
+
+		if (!priv->oldlink) {
+			new_state = 1;
+			priv->oldlink = 1;
+			netif_schedule(dev);
+		}
+	} else if (priv->oldlink) {
+		netif_stop_queue(dev);
+		new_state = 1;
+		priv->oldlink = 0;
+		priv->oldspeed = 0;
+		priv->oldduplex = -1;
+	}
+
+	if (new_state && netif_msg_link(priv) && net_ratelimit())
+		phy_print_status(priv->phy);
+
+	spin_unlock(&priv->lock);
+}
+
+static int cpmac_open(struct net_device *dev)
+{
+	int i, size, res;
+	struct cpmac_priv *priv = netdev_priv(dev);
+	struct resource *mem;
+	struct cpmac_desc *desc;
+	struct sk_buff *skb;
+
+	priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link,
+				0, PHY_INTERFACE_MODE_MII);
+	if (IS_ERR(priv->phy)) {
+		if (netif_msg_drv(priv))
+			printk(KERN_ERR "%s: Could not attach to PHY\n",
+			       dev->name);
+		return PTR_ERR(priv->phy);
+	}
+
+	mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs");
+	if (!request_mem_region(mem->start, mem->end - mem->start, dev->name)) {
+		if (netif_msg_drv(priv))
+			printk(KERN_ERR "%s: failed to request registers\n",
+			       dev->name);
+		res = -ENXIO;
+		goto fail_reserve;
+	}
+
+	priv->regs = ioremap(mem->start, mem->end - mem->start);
+	if (!priv->regs) {
+		if (netif_msg_drv(priv))
+			printk(KERN_ERR "%s: failed to remap registers\n",
+			       dev->name);
+		res = -ENXIO;
+		goto fail_remap;
+	}
+
+	size = priv->ring_size + CPMAC_QUEUES;
+	priv->desc_ring = dma_alloc_coherent(&dev->dev,
+					     sizeof(struct cpmac_desc) * size,
+					     &priv->dma_ring,
+					     GFP_KERNEL);
+	if (!priv->desc_ring) {
+		res = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	for (i = 0; i < size; i++)
+		priv->desc_ring[i].mapping = priv->dma_ring + sizeof(*desc) * i;
+
+	priv->rx_head = &priv->desc_ring[CPMAC_QUEUES];
+	for (i = 0, desc = priv->rx_head; i < priv->ring_size; i++, desc++) {
+		skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE);
+		if (unlikely(!skb)) {
+			res = -ENOMEM;
+			goto fail_desc;
+		}
+		skb_reserve(skb, 2);
+		desc->skb = skb;
+		desc->data_mapping = dma_map_single(&dev->dev, skb->data,
+						    CPMAC_SKB_SIZE,
+						    DMA_FROM_DEVICE);
+		desc->hw_data = (u32)desc->data_mapping;
+		desc->buflen = CPMAC_SKB_SIZE;
+		desc->dataflags = CPMAC_OWN;
+		desc->next = &priv->rx_head[(i + 1) % priv->ring_size];
+		desc->hw_next = (u32)desc->next->mapping;
+	}
+
+	if ((res = request_irq(dev->irq, cpmac_irq, IRQF_SHARED,
+			       dev->name, dev))) {
+		if (netif_msg_drv(priv))
+			printk(KERN_ERR "%s: failed to obtain irq\n",
+			       dev->name);
+		goto fail_irq;
+	}
+
+	INIT_WORK(&priv->reset_work, cpmac_hw_error);
+	cpmac_hw_start(dev);
+
+	priv->phy->state = PHY_CHANGELINK;
+	phy_start(priv->phy);
+
+	return 0;
+
+fail_irq:
+fail_desc:
+	for (i = 0; i < priv->ring_size; i++) {
+		if (priv->rx_head[i].skb) {
+			dma_unmap_single(&dev->dev,
+					 priv->rx_head[i].data_mapping,
+					 CPMAC_SKB_SIZE,
+					 DMA_FROM_DEVICE);
+			kfree_skb(priv->rx_head[i].skb);
+		}
+	}
+fail_alloc:
+	kfree(priv->desc_ring);
+	iounmap(priv->regs);
+
+fail_remap:
+	release_mem_region(mem->start, mem->end - mem->start);
+
+fail_reserve:
+	phy_disconnect(priv->phy);
+
+	return res;
+}
+
+static int cpmac_stop(struct net_device *dev)
+{
+	int i;
+	struct cpmac_priv *priv = netdev_priv(dev);
+	struct resource *mem;
+
+	netif_stop_queue(dev);
+
+	cancel_work_sync(&priv->reset_work);
+	phy_stop(priv->phy);
+	phy_disconnect(priv->phy);
+	priv->phy = NULL;
+
+	cpmac_hw_stop(dev);
+
+	for (i = 0; i < 8; i++)
+		cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0);
+	cpmac_write(priv->regs, CPMAC_RX_PTR(0), 0);
+	cpmac_write(priv->regs, CPMAC_MBP, 0);
+
+	free_irq(dev->irq, dev);
+	iounmap(priv->regs);
+	mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs");
+	release_mem_region(mem->start, mem->end - mem->start);
+	priv->rx_head = &priv->desc_ring[CPMAC_QUEUES];
+	for (i = 0; i < priv->ring_size; i++) {
+		if (priv->rx_head[i].skb) {
+			dma_unmap_single(&dev->dev,
+					 priv->rx_head[i].data_mapping,
+					 CPMAC_SKB_SIZE,
+					 DMA_FROM_DEVICE);
+			kfree_skb(priv->rx_head[i].skb);
+		}
+	}
+
+	dma_free_coherent(&dev->dev, sizeof(struct cpmac_desc) *
+			  (CPMAC_QUEUES + priv->ring_size),
+			  priv->desc_ring, priv->dma_ring);
+	return 0;
+}
+
+static int external_switch;
+
+static int __devinit cpmac_probe(struct platform_device *pdev)
+{
+	int rc, phy_id;
+	struct resource *mem;
+	struct cpmac_priv *priv;
+	struct net_device *dev;
+	struct plat_cpmac_data *pdata;
+
+	pdata = pdev->dev.platform_data;
+
+	for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) {
+		if (!(pdata->phy_mask & (1 << phy_id)))
+			continue;
+		if (!cpmac_mii.phy_map[phy_id])
+			continue;
+		break;
+	}
+
+	if (phy_id == PHY_MAX_ADDR) {
+		if (external_switch || dumb_switch)
+			phy_id = 0;
+		else {
+			printk(KERN_ERR "cpmac: no PHY present\n");
+			return -ENODEV;
+		}
+	}
+
+	dev = alloc_etherdev_mq(sizeof(*priv), CPMAC_QUEUES);
+
+	if (!dev) {
+		printk(KERN_ERR "cpmac: Unable to allocate net_device\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, dev);
+	priv = netdev_priv(dev);
+
+	priv->pdev = pdev;
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	if (!mem) {
+		rc = -ENODEV;
+		goto fail;
+	}
+
+	dev->irq = platform_get_irq_byname(pdev, "irq");
+
+	dev->open               = cpmac_open;
+	dev->stop               = cpmac_stop;
+	dev->set_config         = cpmac_config;
+	dev->hard_start_xmit    = cpmac_start_xmit;
+	dev->do_ioctl           = cpmac_ioctl;
+	dev->set_multicast_list = cpmac_set_multicast_list;
+	dev->tx_timeout         = cpmac_tx_timeout;
+	dev->ethtool_ops        = &cpmac_ethtool_ops;
+	dev->poll = cpmac_poll;
+	dev->weight = 64;
+	dev->features |= NETIF_F_MULTI_QUEUE;
+
+	spin_lock_init(&priv->lock);
+	spin_lock_init(&priv->rx_lock);
+	priv->dev = dev;
+	priv->ring_size = 64;
+	priv->msg_enable = netif_msg_init(debug_level, 0xff);
+	memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr));
+	if (phy_id == 31) {
+		snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT,
+			 cpmac_mii.id, phy_id);
+	} else
+		snprintf(priv->phy_name, BUS_ID_SIZE, "fixed@%d:%d", 100, 1);
+
+	if ((rc = register_netdev(dev))) {
+		printk(KERN_ERR "cpmac: error %i registering device %s\n", rc,
+		       dev->name);
+		goto fail;
+	}
+
+	if (netif_msg_probe(priv)) {
+		printk(KERN_INFO
+		       "cpmac: device %s (regs: %p, irq: %d, phy: %s, mac: "
+		       MAC_FMT ")\n", dev->name, (void *)mem->start, dev->irq,
+		       priv->phy_name, MAC_ARG(dev->dev_addr));
+	}
+	return 0;
+
+fail:
+	free_netdev(dev);
+	return rc;
+}
+
+static int __devexit cpmac_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	unregister_netdev(dev);
+	free_netdev(dev);
+	return 0;
+}
+
+static struct platform_driver cpmac_driver = {
+	.driver.name = "cpmac",
+	.probe = cpmac_probe,
+	.remove = __devexit_p(cpmac_remove),
+};
+
+int __devinit cpmac_init(void)
+{
+	u32 mask;
+	int i, res;
+
+	cpmac_mii.priv = ioremap(AR7_REGS_MDIO, 256);
+
+	if (!cpmac_mii.priv) {
+		printk(KERN_ERR "Can't ioremap mdio registers\n");
+		return -ENXIO;
+	}
+
+#warning FIXME: unhardcode gpio&reset bits
+	ar7_gpio_disable(26);
+	ar7_gpio_disable(27);
+	ar7_device_reset(AR7_RESET_BIT_CPMAC_LO);
+	ar7_device_reset(AR7_RESET_BIT_CPMAC_HI);
+	ar7_device_reset(AR7_RESET_BIT_EPHY);
+
+	cpmac_mii.reset(&cpmac_mii);
+
+	for (i = 0; i < 300000; i++)
+		if ((mask = cpmac_read(cpmac_mii.priv, CPMAC_MDIO_ALIVE)))
+			break;
+		else
+			cpu_relax();
+
+	mask &= 0x7fffffff;
+	if (mask & (mask - 1)) {
+		external_switch = 1;
+		mask = 0;
+	}
+
+	cpmac_mii.phy_mask = ~(mask | 0x80000000);
+
+	res = mdiobus_register(&cpmac_mii);
+	if (res)
+		goto fail_mii;
+
+	res = platform_driver_register(&cpmac_driver);
+	if (res)
+		goto fail_cpmac;
+
+	return 0;
+
+fail_cpmac:
+	mdiobus_unregister(&cpmac_mii);
+
+fail_mii:
+	iounmap(cpmac_mii.priv);
+
+	return res;
+}
+
+void __devexit cpmac_exit(void)
+{
+	platform_driver_unregister(&cpmac_driver);
+	mdiobus_unregister(&cpmac_mii);
+	iounmap(cpmac_mii.priv);
+}
+
+module_init(cpmac_init);
+module_exit(cpmac_exit);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 0db5e6f..558440c 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -168,7 +168,6 @@
 	struct gfar_private *priv = NULL;
 	struct gianfar_platform_data *einfo;
 	struct resource *r;
-	int idx;
 	int err = 0;
 	DECLARE_MAC_BUF(mac);
 
@@ -261,7 +260,9 @@
 	dev->hard_start_xmit = gfar_start_xmit;
 	dev->tx_timeout = gfar_timeout;
 	dev->watchdog_timeo = TX_TIMEOUT;
+#ifdef CONFIG_GFAR_NAPI
 	netif_napi_add(dev, &priv->napi, gfar_poll, GFAR_DEV_WEIGHT);
+#endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = gfar_netpoll;
 #endif
@@ -931,9 +932,14 @@
 /* Returns 0 for success. */
 static int gfar_enet_open(struct net_device *dev)
 {
+#ifdef CONFIG_GFAR_NAPI
+	struct gfar_private *priv = netdev_priv(dev);
+#endif
 	int err;
 
+#ifdef CONFIG_GFAR_NAPI
 	napi_enable(&priv->napi);
+#endif
 
 	/* Initialize a bunch of registers */
 	init_registers(dev);
@@ -943,13 +949,17 @@
 	err = init_phy(dev);
 
 	if(err) {
+#ifdef CONFIG_GFAR_NAPI
 		napi_disable(&priv->napi);
+#endif
 		return err;
 	}
 
 	err = startup_gfar(dev);
 	if (err)
+#ifdef CONFIG_GFAR_NAPI
 		napi_disable(&priv->napi);
+#endif
 
 	netif_start_queue(dev);
 
@@ -1103,7 +1113,9 @@
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
+#ifdef CONFIG_GFAR_NAPI
 	napi_disable(&priv->napi);
+#endif
 
 	stop_gfar(dev);
 
diff --git a/drivers/net/ibm_emac/ibm_emac_mal.c b/drivers/net/ibm_emac/ibm_emac_mal.c
index 4e49e8c..dcd8826 100644
--- a/drivers/net/ibm_emac/ibm_emac_mal.c
+++ b/drivers/net/ibm_emac/ibm_emac_mal.c
@@ -413,7 +413,10 @@
 		       ocpdev->def->index);
 		return -ENOMEM;
 	}
-	mal->dcrbase = maldata->dcr_base;
+
+	/* XXX This only works for native dcr for now */
+	mal->dcrhost = dcr_map(NULL, maldata->dcr_base, 0);
+
 	mal->def = ocpdev->def;
 
 	INIT_LIST_HEAD(&mal->poll_list);
diff --git a/drivers/net/ibm_emac/ibm_emac_mal.h b/drivers/net/ibm_emac/ibm_emac_mal.h
index 8f54d62..b8adbe6 100644
--- a/drivers/net/ibm_emac/ibm_emac_mal.h
+++ b/drivers/net/ibm_emac/ibm_emac_mal.h
@@ -191,7 +191,6 @@
 };
 
 struct ibm_ocp_mal {
-	int			dcrbase;
 	dcr_host_t		dcrhost;
 
 	struct list_head	poll_list;
@@ -209,12 +208,12 @@
 
 static inline u32 get_mal_dcrn(struct ibm_ocp_mal *mal, int reg)
 {
-	return dcr_read(mal->dcrhost, mal->dcrbase + reg);
+	return dcr_read(mal->dcrhost, reg);
 }
 
 static inline void set_mal_dcrn(struct ibm_ocp_mal *mal, int reg, u32 val)
 {
-	dcr_write(mal->dcrhost, mal->dcrbase + reg, val);
+	dcr_write(mal->dcrhost, reg, val);
 }
 
 /* Register MAL devices */
diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c
index 5885411..39f4cb6 100644
--- a/drivers/net/ibm_newemac/mal.c
+++ b/drivers/net/ibm_newemac/mal.c
@@ -461,6 +461,7 @@
 	struct mal_instance *mal;
 	int err = 0, i, bd_size;
 	int index = mal_count++;
+	unsigned int dcr_base;
 	const u32 *prop;
 	u32 cfg;
 
@@ -497,14 +498,14 @@
 	}
 	mal->num_rx_chans = prop[0];
 
-	mal->dcr_base = dcr_resource_start(ofdev->node, 0);
-	if (mal->dcr_base == 0) {
+	dcr_base = dcr_resource_start(ofdev->node, 0);
+	if (dcr_base == 0) {
 		printk(KERN_ERR
 		       "mal%d: can't find DCR resource!\n", index);
 		err = -ENODEV;
 		goto fail;
 	}
-        mal->dcr_host = dcr_map(ofdev->node, mal->dcr_base, 0x100);
+	mal->dcr_host = dcr_map(ofdev->node, dcr_base, 0x100);
 	if (!DCR_MAP_OK(mal->dcr_host)) {
 		printk(KERN_ERR
 		       "mal%d: failed to map DCRs !\n", index);
@@ -626,7 +627,7 @@
  fail2:
 	dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma);
  fail_unmap:
-	dcr_unmap(mal->dcr_host, mal->dcr_base, 0x100);
+	dcr_unmap(mal->dcr_host, 0x100);
  fail:
 	kfree(mal);
 
diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h
index cb1a16d..784edb8 100644
--- a/drivers/net/ibm_newemac/mal.h
+++ b/drivers/net/ibm_newemac/mal.h
@@ -185,7 +185,6 @@
 
 struct mal_instance {
 	int			version;
-	int			dcr_base;
 	dcr_host_t		dcr_host;
 
 	int			num_tx_chans;	/* Number of TX channels */
@@ -213,12 +212,12 @@
 
 static inline u32 get_mal_dcrn(struct mal_instance *mal, int reg)
 {
-	return dcr_read(mal->dcr_host, mal->dcr_base + reg);
+	return dcr_read(mal->dcr_host, reg);
 }
 
 static inline void set_mal_dcrn(struct mal_instance *mal, int reg, u32 val)
 {
-	dcr_write(mal->dcr_host, mal->dcr_base + reg, val);
+	dcr_write(mal->dcr_host, reg, val);
 }
 
 /* Register MAL devices */
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 3e5eca1..a82d8f9 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -840,7 +840,7 @@
 
   /* test 1: SIR filter and back to back */
 
-  for (j = 0; j < (sizeof (bauds) / sizeof (int)); ++j)
+  for (j = 0; j < ARRAY_SIZE(bauds); ++j)
     {
       int fir = (j > 1);
       toshoboe_stopchip (self);
diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c
index d3825c8..5c154fe 100644
--- a/drivers/net/jazzsonic.c
+++ b/drivers/net/jazzsonic.c
@@ -208,7 +208,6 @@
 	struct sonic_local *lp;
 	struct resource *res;
 	int err = 0;
-	int i;
 	DECLARE_MAC_BUF(mac);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index be25aa3..662b8d1 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -265,17 +265,16 @@
 	if (err)
 		goto out_free_netdev;
 
-	err = 0;
 	net->loopback_dev = dev;
+	return 0;
 
-out:
-	if (err)
-		panic("loopback: Failed to register netdevice: %d\n", err);
-	return err;
 
 out_free_netdev:
 	free_netdev(dev);
-	goto out;
+out:
+	if (net == &init_net)
+		panic("loopback: Failed to register netdevice: %d\n", err);
+	return err;
 }
 
 static __net_exit void loopback_net_exit(struct net *net)
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index d593175..37707a0 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -7,12 +7,12 @@
 #define DEBUG
 
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
-#include <asm/io.h>
 #include <asm/mips-boards/simint.h>
 
 #include "mipsnet.h"		/* actual device IO mapping */
@@ -33,9 +33,8 @@
 	if (available_len < len)
 		return -EFAULT;
 
-	for (; len > 0; len--, kdata++) {
+	for (; len > 0; len--, kdata++)
 		*kdata = inb(mipsnet_reg_address(dev, rxDataBuffer));
-	}
 
 	return inl(mipsnet_reg_address(dev, rxDataCount));
 }
@@ -47,16 +46,15 @@
 	char *buf_ptr = skb->data;
 
 	pr_debug("%s: %s(): telling MIPSNET txDataCount(%d)\n",
-	         dev->name, __FUNCTION__, skb->len);
+		 dev->name, __FUNCTION__, skb->len);
 
 	outl(skb->len, mipsnet_reg_address(dev, txDataCount));
 
 	pr_debug("%s: %s(): sending data to MIPSNET txDataBuffer(%d)\n",
-	         dev->name, __FUNCTION__, skb->len);
+		 dev->name, __FUNCTION__, skb->len);
 
-	for (; count_to_go; buf_ptr++, count_to_go--) {
+	for (; count_to_go; buf_ptr++, count_to_go--)
 		outb(*buf_ptr, mipsnet_reg_address(dev, txDataBuffer));
-	}
 
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
@@ -67,7 +65,7 @@
 static int mipsnet_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	pr_debug("%s:%s(): transmitting %d bytes\n",
-	         dev->name, __FUNCTION__, skb->len);
+		 dev->name, __FUNCTION__, skb->len);
 
 	/* Only one packet at a time. Once TXDONE interrupt is serviced, the
 	 * queue will be restarted.
@@ -83,7 +81,8 @@
 	struct sk_buff *skb;
 	size_t len = count;
 
-	if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) {
+	skb = alloc_skb(len + 2, GFP_KERNEL);
+	if (!skb) {
 		dev->stats.rx_dropped++;
 		return -ENOMEM;
 	}
@@ -96,7 +95,7 @@
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	pr_debug("%s:%s(): pushing RXed data to kernel\n",
-	         dev->name, __FUNCTION__);
+		 dev->name, __FUNCTION__);
 	netif_rx(skb);
 
 	dev->stats.rx_packets++;
@@ -114,42 +113,44 @@
 
 	if (irq == dev->irq) {
 		pr_debug("%s:%s(): irq %d for device\n",
-		         dev->name, __FUNCTION__, irq);
+			 dev->name, __FUNCTION__, irq);
 
 		retval = IRQ_HANDLED;
 
 		interruptFlags =
 		    inl(mipsnet_reg_address(dev, interruptControl));
 		pr_debug("%s:%s(): intCtl=0x%016llx\n", dev->name,
-		         __FUNCTION__, interruptFlags);
+			 __FUNCTION__, interruptFlags);
 
 		if (interruptFlags & MIPSNET_INTCTL_TXDONE) {
 			pr_debug("%s:%s(): got TXDone\n",
-			         dev->name, __FUNCTION__);
+				 dev->name, __FUNCTION__);
 			outl(MIPSNET_INTCTL_TXDONE,
 			     mipsnet_reg_address(dev, interruptControl));
-			// only one packet at a time, we are done.
+			/* only one packet at a time, we are done. */
 			netif_wake_queue(dev);
 		} else if (interruptFlags & MIPSNET_INTCTL_RXDONE) {
 			pr_debug("%s:%s(): got RX data\n",
-			         dev->name, __FUNCTION__);
+				 dev->name, __FUNCTION__);
 			mipsnet_get_fromdev(dev,
-			            inl(mipsnet_reg_address(dev, rxDataCount)));
+				    inl(mipsnet_reg_address(dev, rxDataCount)));
 			pr_debug("%s:%s(): clearing RX int\n",
-			         dev->name, __FUNCTION__);
+				 dev->name, __FUNCTION__);
 			outl(MIPSNET_INTCTL_RXDONE,
 			     mipsnet_reg_address(dev, interruptControl));
 
 		} else if (interruptFlags & MIPSNET_INTCTL_TESTBIT) {
 			pr_debug("%s:%s(): got test interrupt\n",
-			         dev->name, __FUNCTION__);
-			// TESTBIT is cleared on read.
-			//    And takes effect after a write with 0
+				 dev->name, __FUNCTION__);
+			/*
+			 * TESTBIT is cleared on read.
+			 * And takes effect after a write with 0
+			 */
 			outl(0, mipsnet_reg_address(dev, interruptControl));
 		} else {
 			pr_debug("%s:%s(): no valid fags 0x%016llx\n",
-			         dev->name, __FUNCTION__, interruptFlags);
-			// Maybe shared IRQ, just ignore, no clearing.
+				 dev->name, __FUNCTION__, interruptFlags);
+			/* Maybe shared IRQ, just ignore, no clearing. */
 			retval = IRQ_NONE;
 		}
 
@@ -159,7 +160,7 @@
 		retval = IRQ_NONE;
 	}
 	return retval;
-}				//mipsnet_interrupt()
+}
 
 static int mipsnet_open(struct net_device *dev)
 {
@@ -171,18 +172,18 @@
 
 	if (err) {
 		pr_debug("%s: %s(): can't get irq %d\n",
-		         dev->name, __FUNCTION__, dev->irq);
+			 dev->name, __FUNCTION__, dev->irq);
 		release_region(dev->base_addr, MIPSNET_IO_EXTENT);
 		return err;
 	}
 
 	pr_debug("%s: %s(): got IO region at 0x%04lx and irq %d for dev.\n",
-	         dev->name, __FUNCTION__, dev->base_addr, dev->irq);
+		 dev->name, __FUNCTION__, dev->base_addr, dev->irq);
 
 
 	netif_start_queue(dev);
 
-	// test interrupt handler
+	/* test interrupt handler */
 	outl(MIPSNET_INTCTL_TESTBIT,
 	     mipsnet_reg_address(dev, interruptControl));
 
@@ -199,8 +200,6 @@
 
 static void mipsnet_set_mclist(struct net_device *dev)
 {
-	// we don't do anything
-	return;
 }
 
 static int __init mipsnet_probe(struct device *dev)
@@ -226,13 +225,13 @@
 	 */
 	netdev->base_addr = 0x4200;
 	netdev->irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB0 +
-	              inl(mipsnet_reg_address(netdev, interruptInfo));
+		      inl(mipsnet_reg_address(netdev, interruptInfo));
 
-	// Get the io region now, get irq on open()
+	/* Get the io region now, get irq on open() */
 	if (!request_region(netdev->base_addr, MIPSNET_IO_EXTENT, "mipsnet")) {
 		pr_debug("%s: %s(): IO region {start: 0x%04lux, len: %d} "
-		         "for dev is not availble.\n", netdev->name,
-		         __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT);
+			 "for dev is not availble.\n", netdev->name,
+			 __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT);
 		err = -EBUSY;
 		goto out_free_netdev;
 	}
diff --git a/drivers/net/mipsnet.h b/drivers/net/mipsnet.h
index 026c732..0132c67 100644
--- a/drivers/net/mipsnet.h
+++ b/drivers/net/mipsnet.h
@@ -9,32 +9,34 @@
 /*
  *  Id of this Net device, as seen by the core.
  */
-#define MIPS_NET_DEV_ID ((uint64_t)           \
-	                     ((uint64_t)'M'<< 0)| \
-	                     ((uint64_t)'I'<< 8)| \
-	                     ((uint64_t)'P'<<16)| \
-	                     ((uint64_t)'S'<<24)| \
-	                     ((uint64_t)'N'<<32)| \
-	                     ((uint64_t)'E'<<40)| \
-	                     ((uint64_t)'T'<<48)| \
-	                     ((uint64_t)'0'<<56))
+#define MIPS_NET_DEV_ID ((uint64_t)	   \
+			     ((uint64_t) 'M' <<  0)| \
+			     ((uint64_t) 'I' <<  8)| \
+			     ((uint64_t) 'P' << 16)| \
+			     ((uint64_t) 'S' << 24)| \
+			     ((uint64_t) 'N' << 32)| \
+			     ((uint64_t) 'E' << 40)| \
+			     ((uint64_t) 'T' << 48)| \
+			     ((uint64_t) '0' << 56))
 
 /*
  * Net status/control block as seen by sw in the core.
  * (Why not use bit fields? can't be bothered with cross-platform struct
  *  packing.)
  */
-typedef struct _net_control_block {
-	/// dev info for probing
-	///  reads as MIPSNET%d where %d is some form of version
-	uint64_t devId;		/*0x00 */
+struct net_control_block {
+	/*
+	 * dev info for probing
+	 * reads as MIPSNET%d where %d is some form of version
+	 */
+	uint64_t devId;		/* 0x00 */
 
 	/*
 	 * read only busy flag.
 	 * Set and cleared by the Net Device to indicate that an rx or a tx
 	 * is in progress.
 	 */
-	uint32_t busy;		/*0x08 */
+	uint32_t busy;		/* 0x08 */
 
 	/*
 	 * Set by the Net Device.
@@ -43,16 +45,16 @@
 	 * rxDataBuffer.  The value will decrease till 0 until all the data
 	 * from rxDataBuffer has been read.
 	 */
-	uint32_t rxDataCount;	/*0x0c */
+	uint32_t rxDataCount;	/* 0x0c */
 #define MIPSNET_MAX_RXTX_DATACOUNT (1<<16)
 
 	/*
-	 * Settable from the MIPS core, cleared by the Net Device.
-	 * The core should set the number of bytes it wants to send,
-	 *   then it should write those bytes of data to txDataBuffer.
-	 * The device will clear txDataCount has been processed (not necessarily sent).
+	 * Settable from the MIPS core, cleared by the Net Device.  The core
+	 * should set the number of bytes it wants to send, then it should
+	 * write those bytes of data to txDataBuffer.  The device will clear
+	 * txDataCount has been processed (not necessarily sent).
 	 */
-	uint32_t txDataCount;	/*0x10 */
+	uint32_t txDataCount;	/* 0x10 */
 
 	/*
 	 * Interrupt control
@@ -69,39 +71,42 @@
 	 *    To clear the test interrupt, write 0 to this register.
 	 */
 	uint32_t interruptControl;	/*0x14 */
-#define MIPSNET_INTCTL_TXDONE     ((uint32_t)(1<< 0))
-#define MIPSNET_INTCTL_RXDONE     ((uint32_t)(1<< 1))
-#define MIPSNET_INTCTL_TESTBIT    ((uint32_t)(1<<31))
-#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE|MIPSNET_INTCTL_RXDONE|MIPSNET_INTCTL_TESTBIT)
+#define MIPSNET_INTCTL_TXDONE     ((uint32_t)(1 <<  0))
+#define MIPSNET_INTCTL_RXDONE     ((uint32_t)(1 <<  1))
+#define MIPSNET_INTCTL_TESTBIT    ((uint32_t)(1 << 31))
+#define MIPSNET_INTCTL_ALLSOURCES	(MIPSNET_INTCTL_TXDONE | \
+					 MIPSNET_INTCTL_RXDONE | \
+					 MIPSNET_INTCTL_TESTBIT)
 
 	/*
-	 * Readonly core-specific interrupt info for the device to signal the core.
-	 * The meaning of the contents of this field might change.
+	 * Readonly core-specific interrupt info for the device to signal the
+	 * core.  The meaning of the contents of this field might change.
+	 *
+	 * TODO: the whole memIntf interrupt scheme is messy: the device should
+	 *       have no control what so ever of what VPE/register set is being
+	 *       used.  The MemIntf should only expose interrupt lines, and
+	 *       something in the config should be responsible for the
+	 *       line<->core/vpe bindings.
 	 */
-	/*###\todo: the whole memIntf interrupt scheme is messy: the device should have
-	 *  no control what so ever of what VPE/register set is being used.
-	 *  The MemIntf should only expose interrupt lines, and something in the
-	 *  config should be responsible for the line<->core/vpe bindings.
-	 */
-	uint32_t interruptInfo;	/*0x18 */
+	uint32_t interruptInfo;	/* 0x18 */
 
 	/*
 	 *  This is where the received data is read out.
 	 *  There is more data to read until rxDataReady is 0.
 	 *  Only 1 byte at this regs offset is used.
 	 */
-	uint32_t rxDataBuffer;	/*0x1c */
+	uint32_t rxDataBuffer;	/* 0x1c */
 
 	/*
-	 * This is where the data to transmit is written.
-	 * Data should be written for the amount specified in the txDataCount register.
-	 *  Only 1 byte at this regs offset is used.
+	 * This is where the data to transmit is written.  Data should be
+	 * written for the amount specified in the txDataCount register.  Only
+	 * 1 byte at this regs offset is used.
 	 */
-	uint32_t txDataBuffer;	/*0x20 */
-} MIPS_T_NetControl;
+	uint32_t txDataBuffer;	/* 0x20 */
+};
 
 #define MIPSNET_IO_EXTENT 0x40	/* being generous */
 
-#define field_offset(field) ((int)&((MIPS_T_NetControl*)(0))->field)
+#define field_offset(field) (offsetof(struct net_control_block, field))
 
 #endif /* __MIPSNET_H */
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e8afa10..64c8151 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -75,7 +75,7 @@
 #include "myri10ge_mcp.h"
 #include "myri10ge_mcp_gen_header.h"
 
-#define MYRI10GE_VERSION_STR "1.3.2-1.269"
+#define MYRI10GE_VERSION_STR "1.3.2-1.287"
 
 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)");
 MODULE_AUTHOR("Maintainer: help@myri.com");
@@ -214,6 +214,8 @@
 	unsigned long serial_number;
 	int vendor_specific_offset;
 	int fw_multicast_support;
+	unsigned long features;
+	u32 max_tso6;
 	u32 read_dma;
 	u32 write_dma;
 	u32 read_write_dma;
@@ -311,6 +313,7 @@
 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8)
 
 static void myri10ge_set_multicast_list(struct net_device *dev);
+static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev);
 
 static inline void put_be32(__be32 val, __be32 __iomem * p)
 {
@@ -612,6 +615,7 @@
 	__be32 buf[16];
 	u32 dma_low, dma_high, size;
 	int status, i;
+	struct myri10ge_cmd cmd;
 
 	size = 0;
 	status = myri10ge_load_hotplug_firmware(mgp, &size);
@@ -688,6 +692,14 @@
 	dev_info(&mgp->pdev->dev, "handoff confirmed\n");
 	myri10ge_dummy_rdma(mgp, 1);
 
+	/* probe for IPv6 TSO support */
+	mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
+	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
+				   &cmd, 0);
+	if (status == 0) {
+		mgp->max_tso6 = cmd.data0;
+		mgp->features |= NETIF_F_TSO6;
+	}
 	return 0;
 }
 
@@ -1047,7 +1059,8 @@
 
 	hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN;
 
-	/* allocate an skb to attach the page(s) to. */
+	/* allocate an skb to attach the page(s) to. This is done
+	 * after trying LRO, so as to avoid skb allocation overheads */
 
 	skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16);
 	if (unlikely(skb == NULL)) {
@@ -1217,7 +1230,8 @@
 
 static int myri10ge_poll(struct napi_struct *napi, int budget)
 {
-	struct myri10ge_priv *mgp = container_of(napi, struct myri10ge_priv, napi);
+	struct myri10ge_priv *mgp =
+	    container_of(napi, struct myri10ge_priv, napi);
 	struct net_device *netdev = mgp->dev;
 	struct myri10ge_rx_done *rx_done = &mgp->rx_done;
 	int work_done;
@@ -1382,6 +1396,18 @@
 	return 0;
 }
 
+static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled)
+{
+	struct myri10ge_priv *mgp = netdev_priv(netdev);
+	unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO);
+
+	if (tso_enabled)
+		netdev->features |= flags;
+	else
+		netdev->features &= ~flags;
+	return 0;
+}
+
 static const char myri10ge_gstrings_stats[][ETH_GSTRING_LEN] = {
 	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
 	"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
@@ -1506,7 +1532,7 @@
 	.set_rx_csum = myri10ge_set_rx_csum,
 	.set_tx_csum = ethtool_op_set_tx_hw_csum,
 	.set_sg = ethtool_op_set_sg,
-	.set_tso = ethtool_op_set_tso,
+	.set_tso = myri10ge_set_tso,
 	.get_link = ethtool_op_get_link,
 	.get_strings = myri10ge_get_strings,
 	.get_sset_count = myri10ge_get_sset_count,
@@ -2164,7 +2190,8 @@
 		pseudo_hdr_offset = cksum_offset + skb->csum_offset;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
-		if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) {
+		if (unlikely(!mss && (cksum_offset > 255 ||
+				      pseudo_hdr_offset > 127))) {
 			if (skb_checksum_help(skb))
 				goto drop;
 			cksum_offset = 0;
@@ -2184,9 +2211,18 @@
 		/* negative cum_len signifies to the
 		 * send loop that we are still in the
 		 * header portion of the TSO packet.
-		 * TSO header must be at most 134 bytes long */
+		 * TSO header can be at most 1KB long */
 		cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
 
+		/* for IPv6 TSO, the checksum offset stores the
+		 * TCP header length, to save the firmware from
+		 * the need to parse the headers */
+		if (skb_is_gso_v6(skb)) {
+			cksum_offset = tcp_hdrlen(skb);
+			/* Can only handle headers <= max_tso6 long */
+			if (unlikely(-cum_len > mgp->max_tso6))
+				return myri10ge_sw_tso(skb, dev);
+		}
 		/* for TSO, pseudo_hdr_offset holds mss.
 		 * The firmware figures out where to put
 		 * the checksum by parsing the header. */
@@ -2301,10 +2337,12 @@
 			req++;
 			count++;
 			rdma_count++;
-			if (unlikely(cksum_offset > seglen))
-				cksum_offset -= seglen;
-			else
-				cksum_offset = 0;
+			if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) {
+				if (unlikely(cksum_offset > seglen))
+					cksum_offset -= seglen;
+				else
+					cksum_offset = 0;
+			}
 		}
 		if (frag_idx == frag_cnt)
 			break;
@@ -2387,6 +2425,41 @@
 
 }
 
+static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev)
+{
+	struct sk_buff *segs, *curr;
+	struct myri10ge_priv *mgp = dev->priv;
+	int status;
+
+	segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6);
+	if (unlikely(IS_ERR(segs)))
+		goto drop;
+
+	while (segs) {
+		curr = segs;
+		segs = segs->next;
+		curr->next = NULL;
+		status = myri10ge_xmit(curr, dev);
+		if (status != 0) {
+			dev_kfree_skb_any(curr);
+			if (segs != NULL) {
+				curr = segs;
+				segs = segs->next;
+				curr->next = NULL;
+				dev_kfree_skb_any(segs);
+			}
+			goto drop;
+		}
+	}
+	dev_kfree_skb_any(skb);
+	return 0;
+
+drop:
+	dev_kfree_skb_any(skb);
+	mgp->stats.tx_dropped += 1;
+	return 0;
+}
+
 static struct net_device_stats *myri10ge_get_stats(struct net_device *dev)
 {
 	struct myri10ge_priv *mgp = netdev_priv(dev);
@@ -2706,7 +2779,6 @@
 }
 
 #ifdef CONFIG_PM
-
 static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct myri10ge_priv *mgp;
@@ -2787,7 +2859,6 @@
 	return -EIO;
 
 }
-
 #endif				/* CONFIG_PM */
 
 static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp)
@@ -2954,8 +3025,7 @@
 
 	mgp = netdev_priv(netdev);
 	mgp->dev = netdev;
-	netif_napi_add(netdev, &mgp->napi,
-		       myri10ge_poll, myri10ge_napi_weight);
+	netif_napi_add(netdev, &mgp->napi, myri10ge_poll, myri10ge_napi_weight);
 	mgp->pdev = pdev;
 	mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
 	mgp->pause = myri10ge_flow_control;
@@ -3077,7 +3147,7 @@
 	netdev->change_mtu = myri10ge_change_mtu;
 	netdev->set_multicast_list = myri10ge_set_multicast_list;
 	netdev->set_mac_address = myri10ge_set_mac_address;
-	netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
+	netdev->features = mgp->features;
 	if (dac_enabled)
 		netdev->features |= NETIF_F_HIGHDMA;
 
diff --git a/drivers/net/myri10ge/myri10ge_mcp.h b/drivers/net/myri10ge/myri10ge_mcp.h
index a1d2a22..58e5717 100644
--- a/drivers/net/myri10ge/myri10ge_mcp.h
+++ b/drivers/net/myri10ge/myri10ge_mcp.h
@@ -10,7 +10,7 @@
 	__be32 low;
 };
 
-/* 4 Bytes */
+/* 4 Bytes.  8 Bytes for NDIS drivers. */
 struct mcp_slot {
 	__sum16 checksum;
 	__be16 length;
@@ -205,8 +205,87 @@
 	/* same than DMA_TEST (same args) but abort with UNALIGNED on unaligned
 	 * chipset */
 
-	MXGEFW_CMD_UNALIGNED_STATUS
-	    /* return data = boolean, true if the chipset is known to be unaligned */
+	MXGEFW_CMD_UNALIGNED_STATUS,
+	/* return data = boolean, true if the chipset is known to be unaligned */
+
+	MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
+	/* data0 = number of big buffers to use.  It must be 0 or a power of 2.
+	 * 0 indicates that the NIC consumes as many buffers as they are required
+	 * for packet. This is the default behavior.
+	 * A power of 2 number indicates that the NIC always uses the specified
+	 * number of buffers for each big receive packet.
+	 * It is up to the driver to ensure that this value is big enough for
+	 * the NIC to be able to receive maximum-sized packets.
+	 */
+
+	MXGEFW_CMD_GET_MAX_RSS_QUEUES,
+	MXGEFW_CMD_ENABLE_RSS_QUEUES,
+	/* data0 = number of slices n (0, 1, ..., n-1) to enable
+	 * data1 = interrupt mode. 0=share one INTx/MSI, 1=use one MSI-X per queue.
+	 * If all queues share one interrupt, the driver must have set
+	 * RSS_SHARED_INTERRUPT_DMA before enabling queues.
+	 */
+	MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET,
+	MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA,
+	/* data0, data1 = bus address lsw, msw */
+	MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
+	/* get the offset of the indirection table */
+	MXGEFW_CMD_SET_RSS_TABLE_SIZE,
+	/* set the size of the indirection table */
+	MXGEFW_CMD_GET_RSS_KEY_OFFSET,
+	/* get the offset of the secret key */
+	MXGEFW_CMD_RSS_KEY_UPDATED,
+	/* tell nic that the secret key's been updated */
+	MXGEFW_CMD_SET_RSS_ENABLE,
+	/* data0 = enable/disable rss
+	 * 0: disable rss.  nic does not distribute receive packets.
+	 * 1: enable rss.  nic distributes receive packets among queues.
+	 * data1 = hash type
+	 * 1: IPV4
+	 * 2: TCP_IPV4
+	 * 3: IPV4 | TCP_IPV4
+	 */
+
+	MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
+	/* Return data = the max. size of the entire headers of a IPv6 TSO packet.
+	 * If the header size of a IPv6 TSO packet is larger than the specified
+	 * value, then the driver must not use TSO.
+	 * This size restriction only applies to IPv6 TSO.
+	 * For IPv4 TSO, the maximum size of the headers is fixed, and the NIC
+	 * always has enough header buffer to store maximum-sized headers.
+	 */
+
+	MXGEFW_CMD_SET_TSO_MODE,
+	/* data0 = TSO mode.
+	 * 0: Linux/FreeBSD style (NIC default)
+	 * 1: NDIS/NetBSD style
+	 */
+
+	MXGEFW_CMD_MDIO_READ,
+	/* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */
+	MXGEFW_CMD_MDIO_WRITE,
+	/* data0 = dev_addr,  data1 = register/addr, data2 = value  */
+
+	MXGEFW_CMD_XFP_I2C_READ,
+	/* Starts to get a fresh copy of one byte or of the whole xfp i2c table, the
+	 * obtained data is cached inside the xaui-xfi chip :
+	 *   data0 : "all" flag : 0 => get one byte, 1=> get 256 bytes,
+	 *   data1 : if (data0 == 0): index of byte to refresh [ not used otherwise ]
+	 * The operation might take ~1ms for a single byte or ~65ms when refreshing all 256 bytes
+	 * During the i2c operation,  MXGEFW_CMD_XFP_I2C_READ or MXGEFW_CMD_XFP_BYTE attempts
+	 *  will return MXGEFW_CMD_ERROR_BUSY
+	 */
+	MXGEFW_CMD_XFP_BYTE,
+	/* Return the last obtained copy of a given byte in the xfp i2c table
+	 * (copy cached during the last relevant MXGEFW_CMD_XFP_I2C_READ)
+	 *   data0 : index of the desired table entry
+	 *  Return data = the byte stored at the requested index in the table
+	 */
+
+	MXGEFW_CMD_GET_VPUMP_OFFSET,
+	/* Return data = NIC memory offset of mcp_vpump_public_global */
+	MXGEFW_CMD_RESET_VPUMP,
+	/* Resets the VPUMP state */
 };
 
 enum myri10ge_mcp_cmd_status {
@@ -220,7 +299,10 @@
 	MXGEFW_CMD_ERROR_BAD_PORT,
 	MXGEFW_CMD_ERROR_RESOURCES,
 	MXGEFW_CMD_ERROR_MULTICAST,
-	MXGEFW_CMD_ERROR_UNALIGNED
+	MXGEFW_CMD_ERROR_UNALIGNED,
+	MXGEFW_CMD_ERROR_NO_MDIO,
+	MXGEFW_CMD_ERROR_XFP_FAILURE,
+	MXGEFW_CMD_ERROR_XFP_ABSENT
 };
 
 #define MXGEFW_OLD_IRQ_DATA_LEN 40
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 527f9dc..50e1ec6 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -1576,7 +1576,7 @@
 
 	/* Set the timer to check for link beat. */
 	init_timer(&np->timer);
-	np->timer.expires = jiffies + NATSEMI_TIMER_FREQ;
+	np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ);
 	np->timer.data = (unsigned long)dev;
 	np->timer.function = &netdev_timer; /* timer handler */
 	add_timer(&np->timer);
@@ -1856,7 +1856,11 @@
 			next_tick = 1;
 		}
 	}
-	mod_timer(&np->timer, jiffies + next_tick);
+
+	if (next_tick > 1)
+		mod_timer(&np->timer, round_jiffies(jiffies + next_tick));
+	else
+		mod_timer(&np->timer, jiffies + next_tick);
 }
 
 static void dump_ring(struct net_device *dev)
@@ -3310,13 +3314,19 @@
 {
 	struct net_device *dev = pci_get_drvdata (pdev);
 	struct netdev_private *np = netdev_priv(dev);
+	int ret = 0;
 
 	rtnl_lock();
 	if (netif_device_present(dev))
 		goto out;
 	if (netif_running(dev)) {
 		BUG_ON(!np->hands_off);
-		pci_enable_device(pdev);
+		ret = pci_enable_device(pdev);
+		if (ret < 0) {
+			dev_err(&pdev->dev,
+				"pci_enable_device() failed: %d\n", ret);
+			goto out;
+		}
 	/*	pci_power_on(pdev); */
 
 		napi_enable(&np->napi);
@@ -3331,12 +3341,12 @@
 		spin_unlock_irq(&np->lock);
 		enable_irq(dev->irq);
 
-		mod_timer(&np->timer, jiffies + 1*HZ);
+		mod_timer(&np->timer, round_jiffies(jiffies + 1*HZ));
 	}
 	netif_device_attach(dev);
 out:
 	rtnl_unlock();
-	return 0;
+	return ret;
 }
 
 #endif /* CONFIG_PM */
diff --git a/drivers/net/ne-h8300.c b/drivers/net/ne-h8300.c
index 368f256..fbc7531 100644
--- a/drivers/net/ne-h8300.c
+++ b/drivers/net/ne-h8300.c
@@ -93,7 +93,7 @@
 	bus_width = *(volatile unsigned char *)ABWCR;
 	bus_width &= 1 << ((base_addr >> 21) & 7);
 
-	for (i = 0; i < sizeof(reg_offset) / sizeof(u32); i++)
+	for (i = 0; i < ARRAY_SIZE(reg_offset); i++)
 		if (bus_width == 0)
 			reg_offset[i] = i * 2 + 1;
 		else
@@ -115,7 +115,7 @@
 
 static inline int init_dev(struct net_device *dev)
 {
-	if (h8300_ne_count < (sizeof(h8300_ne_base) / sizeof(unsigned long))) {
+	if (h8300_ne_count < ARRAY_SIZE(h8300_ne_base)) {
 		dev->base_addr = h8300_ne_base[h8300_ne_count];
 		dev->irq       = h8300_ne_irq[h8300_ne_count];
 		h8300_ne_count++;
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 43bfe7e..ed1f9bb 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6123,19 +6123,19 @@
 	val = nr64(ESPC_PHY_TYPE);
 	switch (np->port) {
 	case 0:
-		val = (val & ESPC_PHY_TYPE_PORT0) >>
+		val8 = (val & ESPC_PHY_TYPE_PORT0) >>
 			ESPC_PHY_TYPE_PORT0_SHIFT;
 		break;
 	case 1:
-		val = (val & ESPC_PHY_TYPE_PORT1) >>
+		val8 = (val & ESPC_PHY_TYPE_PORT1) >>
 			ESPC_PHY_TYPE_PORT1_SHIFT;
 		break;
 	case 2:
-		val = (val & ESPC_PHY_TYPE_PORT2) >>
+		val8 = (val & ESPC_PHY_TYPE_PORT2) >>
 			ESPC_PHY_TYPE_PORT2_SHIFT;
 		break;
 	case 3:
-		val = (val & ESPC_PHY_TYPE_PORT3) >>
+		val8 = (val & ESPC_PHY_TYPE_PORT3) >>
 			ESPC_PHY_TYPE_PORT3_SHIFT;
 		break;
 	default:
@@ -6143,9 +6143,9 @@
 			np->port);
 		return -EINVAL;
 	}
-	niudbg(PROBE, "SPROM: PHY type %llx\n", (unsigned long long) val);
+	niudbg(PROBE, "SPROM: PHY type %x\n", val8);
 
-	switch (val) {
+	switch (val8) {
 	case ESPC_PHY_TYPE_1G_COPPER:
 		/* 1G copper, MII */
 		np->flags &= ~(NIU_FLAGS_FIBER |
@@ -6175,8 +6175,7 @@
 		break;
 
 	default:
-		dev_err(np->device, PFX "Bogus SPROM phy type %llu\n",
-			(unsigned long long) val);
+		dev_err(np->device, PFX "Bogus SPROM phy type %u\n", val8);
 		return -EINVAL;
 	}
 
@@ -6213,7 +6212,7 @@
 	val = nr64(ESPC_MOD_STR_LEN);
 	niudbg(PROBE, "SPROM: MOD_STR_LEN[%llu]\n",
 	       (unsigned long long) val);
-	if (val > 8 * 4)
+	if (val >= 8 * 4)
 		return -EINVAL;
 
 	for (i = 0; i < val; i += 4) {
@@ -6229,7 +6228,7 @@
 	val = nr64(ESPC_BD_MOD_STR_LEN);
 	niudbg(PROBE, "SPROM: BD_MOD_STR_LEN[%llu]\n",
 	       (unsigned long long) val);
-	if (val > 4 * 4)
+	if (val >= 4 * 4)
 		return -EINVAL;
 
 	for (i = 0; i < val; i += 4) {
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index 14361e8..c65199d 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -97,13 +97,16 @@
 	       &lp->evm_saa9730_regs->InterruptBlock1);
 }
 
-static void __attribute_used__ show_saa9730_regs(struct lan_saa9730_private *lp)
+static void __used show_saa9730_regs(struct net_device *dev)
 {
+	struct lan_saa9730_private *lp = netdev_priv(dev);
 	int i, j;
+
 	printk("TxmBufferA = %p\n", lp->TxmBuffer[0][0]);
 	printk("TxmBufferB = %p\n", lp->TxmBuffer[1][0]);
 	printk("RcvBufferA = %p\n", lp->RcvBuffer[0][0]);
 	printk("RcvBufferB = %p\n", lp->RcvBuffer[1][0]);
+
 	for (i = 0; i < LAN_SAA9730_BUFFERS; i++) {
 		for (j = 0; j < LAN_SAA9730_TXM_Q_SIZE; j++) {
 			printk("TxmBuffer[%d][%d] = %x\n", i, j,
@@ -146,11 +149,13 @@
 	       readl(&lp->lan_saa9730_regs->RxCtl));
 	printk("lp->lan_saa9730_regs->RxStatus = %x\n",
 	       readl(&lp->lan_saa9730_regs->RxStatus));
+
 	for (i = 0; i < LAN_SAA9730_CAM_DWORDS; i++) {
 		writel(i, &lp->lan_saa9730_regs->CamAddress);
 		printk("lp->lan_saa9730_regs->CamData = %x\n",
 		       readl(&lp->lan_saa9730_regs->CamData));
 	}
+
 	printk("dev->stats.tx_packets = %lx\n", dev->stats.tx_packets);
 	printk("dev->stats.tx_errors = %lx\n", dev->stats.tx_errors);
 	printk("dev->stats.tx_aborted_errors = %lx\n",
@@ -855,7 +860,7 @@
 	/* Transmitter timeout, serious problems */
 	dev->stats.tx_errors++;
 	printk("%s: transmit timed out, reset\n", dev->name);
-	/*show_saa9730_regs(lp); */
+	/*show_saa9730_regs(dev); */
 	lan_saa9730_restart(lp);
 
 	dev->trans_start = jiffies;
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index a679f43..8038f28 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1461,7 +1461,6 @@
 	}
 	return IRQ_NONE;
 #else
-	struct tc35815_local *lp = dev->priv;
 	int handled;
 	u32 status;
 
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 8d04654..4e1b84e 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1906,7 +1906,7 @@
 
     /************** pci *****************/
 	if ((err = pci_enable_device(pdev)))	/* it trigers interrupt, dunno why. */
-		RET(err);			/* it's not a problem though */
+		goto err_pci;			/* it's not a problem though */
 
 	if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) &&
 	    !(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) {
@@ -2076,6 +2076,7 @@
 	pci_release_regions(pdev);
 err_dma:
 	pci_disable_device(pdev);
+err_pci:
 	vfree(nic);
 
 	RET(err);
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 30b1cca..014dc2c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,8 +64,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.83"
-#define DRV_MODULE_RELDATE	"October 10, 2007"
+#define DRV_MODULE_VERSION	"3.84"
+#define DRV_MODULE_RELDATE	"October 12, 2007"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
@@ -5056,6 +5056,12 @@
 
 	pci_write_config_dword(tp->pdev, TG3PCI_COMMAND, tp->pci_cmd);
 
+	if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
+		pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE,
+				      tp->pci_cacheline_sz);
+		pci_write_config_byte(tp->pdev, PCI_LATENCY_TIMER,
+				      tp->pci_lat_timer);
+	}
 	/* Make sure PCI-X relaxed ordering bit is clear. */
 	if (tp->pcix_cap) {
 		u16 pcix_cmd;
@@ -9034,7 +9040,7 @@
 	int i;
 	u32 j;
 
-	for (i = 0; i < sizeof(test_pattern)/sizeof(u32); i++) {
+	for (i = 0; i < ARRAY_SIZE(test_pattern); i++) {
 		for (j = 0; j < len; j += 4) {
 			u32 val;
 
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 9b9cd83..41f34bb 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1041,7 +1041,7 @@
     {DC21142, dc21142_infoleaf},
     {DC21143, dc21143_infoleaf}
 };
-#define INFOLEAF_SIZE (sizeof(infoleaf_array)/(sizeof(int)+sizeof(int *)))
+#define INFOLEAF_SIZE ARRAY_SIZE(infoleaf_array)
 
 /*
 ** List the SROM info block functions
@@ -1056,7 +1056,7 @@
     compact_infoblock
 };
 
-#define COMPACT (sizeof(dc_infoblock)/sizeof(int *) - 1)
+#define COMPACT (ARRAY_SIZE(dc_infoblock) - 1)
 
 /*
 ** Miscellaneous defines...
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index d00e7d4..bec413b 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -63,7 +63,7 @@
 #define UGETH_MSG_DEFAULT	(NETIF_MSG_IFUP << 1 ) - 1
 
 void uec_set_ethtool_ops(struct net_device *netdev);
-	
+
 static DEFINE_SPINLOCK(ugeth_lock);
 
 static struct {
@@ -3454,9 +3454,12 @@
 	u16 length, howmany = 0;
 	u32 bd_status;
 	u8 *bdBuffer;
+	struct net_device * dev;
 
 	ugeth_vdbg("%s: IN", __FUNCTION__);
 
+	dev = ugeth->dev;
+
 	/* collect received buffers */
 	bd = ugeth->rxBd[rxQ];
 
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index b39a541..05df0a3 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -1342,11 +1342,11 @@
 	if (flp->initialized)
 		return(-EINVAL);
 
-	for(i=0;i < sizeof(valid_port) / sizeof (int) ; i++)
+	for(i=0; i < ARRAY_SIZE(valid_port); i++)
 		if (valid_port[i] == map->base_addr)
 			break;   
 
-	if (i == sizeof(valid_port) / sizeof(int))
+	if (i == ARRAY_SIZE(valid_port))
 		return(-EINVAL);
 
 	if (!request_region(map->base_addr, SDLA_IO_EXTENTS, dev->name)){
@@ -1487,12 +1487,12 @@
 		}
 	}
 
-	for(i=0;i < sizeof(valid_mem) / sizeof (int) ; i++)
+	for(i=0; i < ARRAY_SIZE(valid_mem); i++)
 		if (valid_mem[i] == map->mem_start)
 			break;   
 
 	err = -EINVAL;
-	if (i == sizeof(valid_mem) / sizeof(int))
+	if (i == ARRAY_SIZE(valid_mem))
 		goto fail2;
 
 	if (flp->type == SDLA_S502A && (map->mem_start & 0xF000) >> 12 == 0x0E)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index f464b82..7fd505c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -74,22 +74,12 @@
 
 	struct napi_struct napi;
 
-	struct xen_netif_tx_front_ring tx;
-	struct xen_netif_rx_front_ring rx;
+	unsigned int evtchn;
+	struct xenbus_device *xbdev;
 
 	spinlock_t   tx_lock;
-	spinlock_t   rx_lock;
-
-	unsigned int evtchn;
-
-	/* Receive-ring batched refills. */
-#define RX_MIN_TARGET 8
-#define RX_DFL_MIN_TARGET 64
-#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
-	unsigned rx_min_target, rx_max_target, rx_target;
-	struct sk_buff_head rx_batch;
-
-	struct timer_list rx_refill_timer;
+	struct xen_netif_tx_front_ring tx;
+	int tx_ring_ref;
 
 	/*
 	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
@@ -108,14 +98,23 @@
 	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 	unsigned tx_skb_freelist;
 
+	spinlock_t   rx_lock ____cacheline_aligned_in_smp;
+	struct xen_netif_rx_front_ring rx;
+	int rx_ring_ref;
+
+	/* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_DFL_MIN_TARGET 64
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+	unsigned rx_min_target, rx_max_target, rx_target;
+	struct sk_buff_head rx_batch;
+
+	struct timer_list rx_refill_timer;
+
 	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 	grant_ref_t gref_rx_head;
 	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 
-	struct xenbus_device *xbdev;
-	int tx_ring_ref;
-	int rx_ring_ref;
-
 	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index e8010a7..3ac080e 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -5213,6 +5213,10 @@
 #endif /* CONFIG_PCI */
 
 	TRACE2(("gdth_detect() %d controller detected\n", gdth_ctr_count));
+
+	if (list_empty(&gdth_instances))
+		return -ENODEV;
+
 #ifdef GDTH_STATISTICS
 	TRACE2(("gdth_detect(): Initializing timer !\n"));
 	init_timer(&gdth_timer);
diff --git a/fs/Kconfig b/fs/Kconfig
index bb02b39..815d201 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1755,6 +1755,14 @@
 config SUNRPC_GSS
 	tristate
 
+config SUNRPC_XPRT_RDMA
+	tristate "RDMA transport for sunrpc (EXPERIMENTAL)"
+	depends on SUNRPC && INFINIBAND && EXPERIMENTAL
+	default m
+	help
+	  Adds a client RPC transport for supporting kernel NFS over RDMA
+	  mounts, including Infiniband and iWARP. Experimental.
+
 config SUNRPC_BIND34
 	bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
 	depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/inode.c b/fs/inode.c
index 29f5068..f97de0a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,15 @@
 			return NULL;
 		}
 
+		spin_lock_init(&inode->i_lock);
+		lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+
+		mutex_init(&inode->i_mutex);
+		lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
+
+		init_rwsem(&inode->i_alloc_sem);
+		lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+
 		mapping->a_ops = &empty_aops;
  		mapping->host = inode;
 		mapping->flags = 0;
@@ -190,8 +199,6 @@
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
-	mutex_init(&inode->i_mutex);
-	init_rwsem(&inode->i_alloc_sem);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	rwlock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -199,7 +206,6 @@
 	spin_lock_init(&inode->i_data.private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
-	spin_lock_init(&inode->i_lock);
 	i_size_ordered_init(inode);
 #ifdef CONFIG_INOTIFY
 	INIT_LIST_HEAD(&inode->inotify_watches);
@@ -561,6 +567,18 @@
 
 void unlock_new_inode(struct inode *inode)
 {
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct file_system_type *type = inode->i_sb->s_type;
+	/*
+	 * ensure nobody is actually holding i_mutex
+	 */
+	mutex_destroy(&inode->i_mutex);
+	mutex_init(&inode->i_mutex);
+	if (inode->i_mode & S_IFDIR)
+		lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+	else
+		lockdep_set_class(&inode->i_mutex, &type->i_mutex_key);
+#endif
 	/*
 	 * This is special!  We do not need the spinlock
 	 * when clearing I_LOCK, because we're guaranteed
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 772b653..8df5bac 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -233,6 +233,8 @@
 	return ret;
 }
 
+static struct lock_class_key jbd_handle_key;
+
 /* Allocate a new handle.  This should probably be in a slab... */
 static handle_t *new_handle(int nblocks)
 {
@@ -243,6 +245,8 @@
 	handle->h_buffer_credits = nblocks;
 	handle->h_ref = 1;
 
+	lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0);
+
 	return handle;
 }
 
@@ -286,6 +290,9 @@
 		current->journal_info = NULL;
 		handle = ERR_PTR(err);
 	}
+
+	lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+
 	return handle;
 }
 
@@ -1411,6 +1418,8 @@
 		spin_unlock(&journal->j_state_lock);
 	}
 
+	lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+
 	jbd_free_handle(handle);
 	return err;
 }
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3353ed8..908b23f 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -10,6 +10,7 @@
 #include <linux/utsname.h>
 #include <linux/kernel.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/sm_inter.h>
@@ -132,7 +133,7 @@
 		.sin_port	= 0,
 	};
 	struct rpc_create_args args = {
-		.protocol	= IPPROTO_UDP,
+		.protocol	= XPRT_TRANSPORT_UDP,
 		.address	= (struct sockaddr *)&sin,
 		.addrsize	= sizeof(sin),
 		.servername	= "localhost",
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5316e30..633653b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -62,8 +62,9 @@
 	}
 	else 
 	{
-		printk(KERN_NOTICE
-			"lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN);
+		dprintk("lockd: bad cookie size %d (only cookies under "
+			"%d bytes are supported.)\n",
+				len, NLM_MAXCOOKIELEN);
 		return NULL;
 	}
 	return p;
@@ -84,8 +85,7 @@
 	unsigned int	len;
 
 	if ((len = ntohl(*p++)) != NFS2_FHSIZE) {
-		printk(KERN_NOTICE
-			"lockd: bad fhandle size %d (should be %d)\n",
+		dprintk("lockd: bad fhandle size %d (should be %d)\n",
 			len, NFS2_FHSIZE);
 		return NULL;
 	}
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 846fc1d..43ff939 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -64,8 +64,9 @@
 	}
 	else 
 	{
-		printk(KERN_NOTICE
-			"lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN);
+		dprintk("lockd: bad cookie size %d (only cookies under "
+			"%d bytes are supported.)\n",
+				len, NLM_MAXCOOKIELEN);
 		return NULL;
 	}
 	return p;
@@ -86,8 +87,7 @@
 	memset(f->data, 0, sizeof(f->data));
 	f->size = ntohl(*p++);
 	if (f->size > NFS_MAXFHSIZE) {
-		printk(KERN_NOTICE
-			"lockd: bad fhandle size %d (should be <=%d)\n",
+		dprintk("lockd: bad fhandle size %d (should be <=%d)\n",
 			f->size, NFS_MAXFHSIZE);
 		return NULL;
 	}
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b55cb23..df0f41e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -16,4 +16,3 @@
 			   nfs4namespace.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
-nfs-objs		:= $(nfs-y)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a204484..a532ee1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -23,6 +23,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/xprtsock.h>
+#include <linux/sunrpc/xprtrdma.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -340,7 +342,8 @@
 		to->to_retries = 2;
 
 	switch (proto) {
-	case IPPROTO_TCP:
+	case XPRT_TRANSPORT_TCP:
+	case XPRT_TRANSPORT_RDMA:
 		if (!to->to_initval)
 			to->to_initval = 60 * HZ;
 		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
@@ -349,7 +352,7 @@
 		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
 		to->to_exponential = 0;
 		break;
-	case IPPROTO_UDP:
+	case XPRT_TRANSPORT_UDP:
 	default:
 		if (!to->to_initval)
 			to->to_initval = 11 * HZ / 10;
@@ -501,9 +504,9 @@
 /*
  * Initialise an NFS2 or NFS3 client
  */
-static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+static int nfs_init_client(struct nfs_client *clp,
+			   const struct nfs_parsed_mount_data *data)
 {
-	int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 	int error;
 
 	if (clp->cl_cons_state == NFS_CS_READY) {
@@ -522,8 +525,8 @@
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
-	error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
-					RPC_AUTH_UNIX, 0);
+	error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
+				data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -538,7 +541,8 @@
 /*
  * Create a version 2 or 3 client
  */
-static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+static int nfs_init_server(struct nfs_server *server,
+			   const struct nfs_parsed_mount_data *data)
 {
 	struct nfs_client *clp;
 	int error, nfsvers = 2;
@@ -551,7 +555,8 @@
 #endif
 
 	/* Allocate or find a client reference we can use */
-	clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+	clp = nfs_get_client(data->nfs_server.hostname,
+				&data->nfs_server.address, nfsvers);
 	if (IS_ERR(clp)) {
 		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
 		return PTR_ERR(clp);
@@ -581,7 +586,7 @@
 	if (error < 0)
 		goto error;
 
-	error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+	error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
 	if (error < 0)
 		goto error;
 
@@ -760,7 +765,7 @@
  * Create a version 2 or 3 volume record
  * - keyed on server and FSID
  */
-struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
 				     struct nfs_fh *mntfh)
 {
 	struct nfs_server *server;
@@ -906,7 +911,7 @@
  * Create a version 4 volume record
  */
 static int nfs4_init_server(struct nfs_server *server,
-		const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+		const struct nfs_parsed_mount_data *data)
 {
 	int error;
 
@@ -926,7 +931,7 @@
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
 
-	error = nfs_init_server_rpcclient(server, authflavour);
+	error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
 
 	/* Done */
 	dprintk("<-- nfs4_init_server() = %d\n", error);
@@ -937,12 +942,7 @@
  * Create a version 4 volume record
  * - keyed on server and FSID
  */
-struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
-				      const char *hostname,
-				      const struct sockaddr_in *addr,
-				      const char *mntpath,
-				      const char *ip_addr,
-				      rpc_authflavor_t authflavour,
+struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 				      struct nfs_fh *mntfh)
 {
 	struct nfs_fattr fattr;
@@ -956,13 +956,18 @@
 		return ERR_PTR(-ENOMEM);
 
 	/* Get a client record */
-	error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour,
-			data->proto, data->timeo, data->retrans);
+	error = nfs4_set_client(server,
+			data->nfs_server.hostname,
+			&data->nfs_server.address,
+			data->client_address,
+			data->auth_flavors[0],
+			data->nfs_server.protocol,
+			data->timeo, data->retrans);
 	if (error < 0)
 		goto error;
 
 	/* set up the general RPC client */
-	error = nfs4_init_server(server, data, authflavour);
+	error = nfs4_init_server(server, data);
 	if (error < 0)
 		goto error;
 
@@ -971,7 +976,7 @@
 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
 
 	/* Probe the root fh to retrieve its FSID */
-	error = nfs4_path_walk(server, mntfh, mntpath);
+	error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c55a761..af8b235 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -52,7 +52,7 @@
 	for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
 		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
-		if ((struct nfs_open_context *)fl->fl_file->private_data != ctx)
+		if (nfs_file_open_context(fl->fl_file) != ctx)
 			continue;
 		status = nfs4_lock_delegation_recall(state, fl);
 		if (status >= 0)
@@ -109,6 +109,7 @@
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+	struct rpc_cred *oldcred;
 
 	if (delegation == NULL)
 		return;
@@ -116,11 +117,12 @@
 			sizeof(delegation->stateid.data));
 	delegation->type = res->delegation_type;
 	delegation->maxsize = res->maxsize;
-	put_rpccred(cred);
+	oldcred = delegation->cred;
 	delegation->cred = get_rpccred(cred);
 	delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
 	NFS_I(inode)->delegation_state = delegation->type;
 	smp_wmb();
+	put_rpccred(oldcred);
 }
 
 /*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e4a04d1..8ec7fbd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -200,9 +200,6 @@
 	desc->timestamp = timestamp;
 	desc->timestamp_valid = 1;
 	SetPageUptodate(page);
-	spin_lock(&inode->i_lock);
-	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-	spin_unlock(&inode->i_lock);
 	/* Ensure consistent page alignment of the data.
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_mutex or some other mechanism.
@@ -214,9 +211,7 @@
 	unlock_page(page);
 	return 0;
  error:
-	SetPageError(page);
 	unlock_page(page);
-	nfs_zap_caches(inode);
 	desc->error = error;
 	return -EIO;
 }
@@ -407,7 +402,7 @@
 	struct file	*file = desc->file;
 	struct nfs_entry *entry = desc->entry;
 	struct dentry	*dentry = NULL;
-	unsigned long	fileid;
+	u64		fileid;
 	int		loop_count = 0,
 			res;
 
@@ -418,7 +413,7 @@
 		unsigned d_type = DT_UNKNOWN;
 		/* Note: entry->prev_cookie contains the cookie for
 		 *	 retrieving the current dirent on the server */
-		fileid = nfs_fileid_to_ino_t(entry->ino);
+		fileid = entry->ino;
 
 		/* Get a dentry if we have one */
 		if (dentry != NULL)
@@ -428,11 +423,12 @@
 		/* Use readdirplus info */
 		if (dentry != NULL && dentry->d_inode != NULL) {
 			d_type = dt_type(dentry->d_inode);
-			fileid = dentry->d_inode->i_ino;
+			fileid = NFS_FILEID(dentry->d_inode);
 		}
 
 		res = filldir(dirent, entry->name, entry->len, 
-			      file->f_pos, fileid, d_type);
+			      file->f_pos, nfs_compat_user_ino64(fileid),
+			      d_type);
 		if (res < 0)
 			break;
 		file->f_pos++;
@@ -490,9 +486,6 @@
 						page,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
-	spin_lock(&inode->i_lock);
-	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-	spin_unlock(&inode->i_lock);
 	desc->page = page;
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (desc->error >= 0) {
@@ -558,7 +551,7 @@
 	memset(desc, 0, sizeof(*desc));
 
 	desc->file = filp;
-	desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie;
+	desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = NFS_USE_READDIRPLUS(inode);
 
@@ -623,7 +616,7 @@
 	}
 	if (offset != filp->f_pos) {
 		filp->f_pos = offset;
-		((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
+		nfs_file_open_context(filp)->dir_cookie = 0;
 	}
 out:
 	mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
@@ -650,36 +643,18 @@
  */
 static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 {
-	unsigned long verf;
-
 	if (IS_ROOT(dentry))
 		return 1;
-	verf = dentry->d_time;
-	if (nfs_caches_unstable(dir)
-			|| verf != NFS_I(dir)->cache_change_attribute)
+	if (!nfs_verify_change_attribute(dir, dentry->d_time))
+		return 0;
+	/* Revalidate nfsi->cache_change_attribute before we declare a match */
+	if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+		return 0;
+	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
 	return 1;
 }
 
-static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
-{
-	dentry->d_time = verf;
-}
-
-static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf)
-{
-	nfs_set_verifier(dentry, verf);
-}
-
-/*
- * Whenever an NFS operation succeeds, we know that the dentry
- * is valid, so we update the revalidation timestamp.
- */
-static inline void nfs_renew_times(struct dentry * dentry)
-{
-	dentry->d_time = jiffies;
-}
-
 /*
  * Return the intent data that applies to this particular path component
  *
@@ -695,6 +670,19 @@
 }
 
 /*
+ * Use intent information to check whether or not we're going to do
+ * an O_EXCL create using this path component.
+ */
+static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+{
+	if (NFS_PROTO(dir)->version == 2)
+		return 0;
+	if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
+		return 0;
+	return (nd->intent.open.flags & O_EXCL) != 0;
+}
+
+/*
  * Inode and filehandle revalidation for lookups.
  *
  * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
@@ -717,6 +705,7 @@
 				(S_ISREG(inode->i_mode) ||
 				 S_ISDIR(inode->i_mode)))
 			goto out_force;
+		return 0;
 	}
 	return nfs_revalidate_inode(server, inode);
 out_force:
@@ -759,7 +748,6 @@
 	int error;
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr;
-	unsigned long verifier;
 
 	parent = dget_parent(dentry);
 	lock_kernel();
@@ -767,10 +755,6 @@
 	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
 
-	/* Revalidate parent directory attribute cache */
-	if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
-		goto out_zap_parent;
-
 	if (!inode) {
 		if (nfs_neg_need_reval(dir, dentry, nd))
 			goto out_bad;
@@ -785,7 +769,7 @@
 	}
 
 	/* Force a full look up iff the parent directory has changed */
-	if (nfs_check_verifier(dir, dentry)) {
+	if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
 		if (nfs_lookup_verify_inode(inode, nd))
 			goto out_zap_parent;
 		goto out_valid;
@@ -794,7 +778,6 @@
 	if (NFS_STALE(inode))
 		goto out_bad;
 
-	verifier = nfs_save_change_attribute(dir);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error)
 		goto out_bad;
@@ -803,8 +786,7 @@
 	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
 		goto out_bad;
 
-	nfs_renew_times(dentry);
-	nfs_refresh_verifier(dentry, verifier);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
  out_valid:
 	unlock_kernel();
 	dput(parent);
@@ -815,7 +797,7 @@
 out_zap_parent:
 	nfs_zap_caches(dir);
  out_bad:
-	NFS_CACHEINV(dir);
+	nfs_mark_for_revalidate(dir);
 	if (inode && S_ISDIR(inode->i_mode)) {
 		/* Purge readdir caches. */
 		nfs_zap_caches(inode);
@@ -872,8 +854,6 @@
 		nfs_complete_unlink(dentry, inode);
 		unlock_kernel();
 	}
-	/* When creating a negative dentry, we want to renew d_time */
-	nfs_renew_times(dentry);
 	iput(inode);
 }
 
@@ -883,30 +863,6 @@
 	.d_iput		= nfs_dentry_iput,
 };
 
-/*
- * Use intent information to check whether or not we're going to do
- * an O_EXCL create using this path component.
- */
-static inline
-int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
-{
-	if (NFS_PROTO(dir)->version == 2)
-		return 0;
-	if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
-		return 0;
-	return (nd->intent.open.flags & O_EXCL) != 0;
-}
-
-static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
-{
-	struct nfs_server *server = NFS_SERVER(dir);
-
-	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
-		/* Revalidate fsid using the parent directory */
-		return __nfs_revalidate_inode(server, dir);
-	return 0;
-}
-
 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
 	struct dentry *res;
@@ -945,11 +901,6 @@
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(dir, &fattr);
-	if (error < 0) {
-		res = ERR_PTR(error);
-		goto out_unlock;
-	}
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
@@ -958,17 +909,10 @@
 no_entry:
 	res = d_materialise_unique(dentry, inode);
 	if (res != NULL) {
-		struct dentry *parent;
 		if (IS_ERR(res))
 			goto out_unlock;
-		/* Was a directory renamed! */
-		parent = dget_parent(res);
-		if (!IS_ROOT(parent))
-			nfs_mark_for_revalidate(parent->d_inode);
-		dput(parent);
 		dentry = res;
 	}
-	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unlock:
 	unlock_kernel();
@@ -1020,28 +964,16 @@
 	}
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
-	/* Let vfs_create() deal with O_EXCL */
+	/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
+	 * the dentry. */
 	if (nd->intent.open.flags & O_EXCL) {
-		d_add(dentry, NULL);
+		d_instantiate(dentry, NULL);
 		goto out;
 	}
 
 	/* Open the file on the server */
 	lock_kernel();
-	/* Revalidate parent directory attribute cache */
-	error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
-	if (error < 0) {
-		res = ERR_PTR(error);
-		unlock_kernel();
-		goto out;
-	}
-
-	if (nd->intent.open.flags & O_CREAT) {
-		nfs_begin_data_update(dir);
-		res = nfs4_atomic_open(dir, dentry, nd);
-		nfs_end_data_update(dir);
-	} else
-		res = nfs4_atomic_open(dir, dentry, nd);
+	res = nfs4_atomic_open(dir, dentry, nd);
 	unlock_kernel();
 	if (IS_ERR(res)) {
 		error = PTR_ERR(res);
@@ -1063,8 +995,6 @@
 		}
 	} else if (res != NULL)
 		dentry = res;
-	nfs_renew_times(dentry);
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out:
 	return res;
 no_open:
@@ -1076,7 +1006,6 @@
 	struct dentry *parent = NULL;
 	struct inode *inode = dentry->d_inode;
 	struct inode *dir;
-	unsigned long verifier;
 	int openflags, ret = 0;
 
 	parent = dget_parent(dentry);
@@ -1086,8 +1015,12 @@
 	/* We can't create new files in nfs_open_revalidate(), so we
 	 * optimize away revalidation of negative dentries.
 	 */
-	if (inode == NULL)
+	if (inode == NULL) {
+		if (!nfs_neg_need_reval(dir, dentry, nd))
+			ret = 1;
 		goto out;
+	}
+
 	/* NFS only supports OPEN on regular files */
 	if (!S_ISREG(inode->i_mode))
 		goto no_open;
@@ -1104,10 +1037,7 @@
 	 * change attribute *before* we do the RPC call.
 	 */
 	lock_kernel();
-	verifier = nfs_save_change_attribute(dir);
 	ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
-	if (!ret)
-		nfs_refresh_verifier(dentry, verifier);
 	unlock_kernel();
 out:
 	dput(parent);
@@ -1133,6 +1063,7 @@
 		.len = entry->len,
 	};
 	struct inode *inode;
+	unsigned long verf = nfs_save_change_attribute(dir);
 
 	switch (name.len) {
 		case 2:
@@ -1143,6 +1074,14 @@
 			if (name.name[0] == '.')
 				return dget(parent);
 	}
+
+	spin_lock(&dir->i_lock);
+	if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) {
+		spin_unlock(&dir->i_lock);
+		return NULL;
+	}
+	spin_unlock(&dir->i_lock);
+
 	name.hash = full_name_hash(name.name, name.len);
 	dentry = d_lookup(parent, &name);
 	if (dentry != NULL) {
@@ -1183,12 +1122,8 @@
 		dentry = alias;
 	}
 
-	nfs_renew_times(dentry);
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-	return dentry;
 out_renew:
-	nfs_renew_times(dentry);
-	nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir));
+	nfs_set_verifier(dentry, verf);
 	return dentry;
 }
 
@@ -1198,32 +1133,40 @@
 int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 				struct nfs_fattr *fattr)
 {
+	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir = parent->d_inode;
 	struct inode *inode;
 	int error = -EACCES;
 
+	d_drop(dentry);
+
 	/* We may have been initialized further down */
 	if (dentry->d_inode)
-		return 0;
+		goto out;
 	if (fhandle->size == 0) {
-		struct inode *dir = dentry->d_parent->d_inode;
 		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 		if (error)
-			return error;
+			goto out_error;
 	}
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
 		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
-			return error;
+			goto out_error;
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	error = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		return error;
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry))
-		d_rehash(dentry);
+		goto out_error;
+	d_add(dentry, inode);
+out:
+	dput(parent);
 	return 0;
+out_error:
+	nfs_mark_for_revalidate(dir);
+	dput(parent);
+	return error;
 }
 
 /*
@@ -1249,13 +1192,9 @@
 		open_flags = nd->intent.open.flags;
 
 	lock_kernel();
-	nfs_begin_data_update(dir);
 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
-	nfs_end_data_update(dir);
 	if (error != 0)
 		goto out_err;
-	nfs_renew_times(dentry);
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	unlock_kernel();
 	return 0;
 out_err:
@@ -1283,13 +1222,9 @@
 	attr.ia_valid = ATTR_MODE;
 
 	lock_kernel();
-	nfs_begin_data_update(dir);
 	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
-	nfs_end_data_update(dir);
 	if (status != 0)
 		goto out_err;
-	nfs_renew_times(dentry);
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	unlock_kernel();
 	return 0;
 out_err:
@@ -1313,13 +1248,9 @@
 	attr.ia_mode = mode | S_IFDIR;
 
 	lock_kernel();
-	nfs_begin_data_update(dir);
 	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
-	nfs_end_data_update(dir);
 	if (error != 0)
 		goto out_err;
-	nfs_renew_times(dentry);
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	unlock_kernel();
 	return 0;
 out_err:
@@ -1336,12 +1267,10 @@
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	lock_kernel();
-	nfs_begin_data_update(dir);
 	error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
 	/* Ensure the VFS deletes this inode */
 	if (error == 0 && dentry->d_inode != NULL)
 		clear_nlink(dentry->d_inode);
-	nfs_end_data_update(dir);
 	unlock_kernel();
 
 	return error;
@@ -1350,9 +1279,9 @@
 static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 {
 	static unsigned int sillycounter;
-	const int      i_inosize  = sizeof(dir->i_ino)*2;
+	const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
 	const int      countersize = sizeof(sillycounter)*2;
-	const int      slen       = sizeof(".nfs") + i_inosize + countersize - 1;
+	const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
 	char           silly[slen+1];
 	struct qstr    qsilly;
 	struct dentry *sdentry;
@@ -1370,8 +1299,9 @@
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 		goto out;
 
-	sprintf(silly, ".nfs%*.*lx",
-		i_inosize, i_inosize, dentry->d_inode->i_ino);
+	sprintf(silly, ".nfs%*.*Lx",
+		fileidsize, fileidsize,
+		(unsigned long long)NFS_FILEID(dentry->d_inode));
 
 	/* Return delegation in anticipation of the rename */
 	nfs_inode_return_delegation(dentry->d_inode);
@@ -1398,19 +1328,14 @@
 
 	qsilly.name = silly;
 	qsilly.len  = strlen(silly);
-	nfs_begin_data_update(dir);
 	if (dentry->d_inode) {
-		nfs_begin_data_update(dentry->d_inode);
 		error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
 				dir, &qsilly);
 		nfs_mark_for_revalidate(dentry->d_inode);
-		nfs_end_data_update(dentry->d_inode);
 	} else
 		error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
 				dir, &qsilly);
-	nfs_end_data_update(dir);
 	if (!error) {
-		nfs_renew_times(dentry);
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 		d_move(dentry, sdentry);
 		error = nfs_async_unlink(dir, dentry);
@@ -1443,19 +1368,15 @@
 		goto out;
 	}
 
-	nfs_begin_data_update(dir);
 	if (inode != NULL) {
 		nfs_inode_return_delegation(inode);
-		nfs_begin_data_update(inode);
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 		/* The VFS may want to delete this inode */
 		if (error == 0)
 			drop_nlink(inode);
 		nfs_mark_for_revalidate(inode);
-		nfs_end_data_update(inode);
 	} else
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
-	nfs_end_data_update(dir);
 out:
 	return error;
 }
@@ -1493,7 +1414,6 @@
 	spin_unlock(&dcache_lock);
 	error = nfs_safe_remove(dentry);
 	if (!error) {
-		nfs_renew_times(dentry);
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	} else if (need_rehash)
 		d_rehash(dentry);
@@ -1548,9 +1468,7 @@
 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
 	kunmap_atomic(kaddr, KM_USER0);
 
-	nfs_begin_data_update(dir);
 	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
-	nfs_end_data_update(dir);
 	if (error != 0) {
 		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
 			dir->i_sb->s_id, dir->i_ino,
@@ -1590,15 +1508,12 @@
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
 	lock_kernel();
-	nfs_begin_data_update(dir);
-	nfs_begin_data_update(inode);
+	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
 		atomic_inc(&inode->i_count);
-		d_instantiate(dentry, inode);
+		d_add(dentry, inode);
 	}
-	nfs_end_data_update(inode);
-	nfs_end_data_update(dir);
 	unlock_kernel();
 	return error;
 }
@@ -1701,22 +1616,16 @@
 		d_delete(new_dentry);
 	}
 
-	nfs_begin_data_update(old_dir);
-	nfs_begin_data_update(new_dir);
-	nfs_begin_data_update(old_inode);
 	error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
 					   new_dir, &new_dentry->d_name);
 	nfs_mark_for_revalidate(old_inode);
-	nfs_end_data_update(old_inode);
-	nfs_end_data_update(new_dir);
-	nfs_end_data_update(old_dir);
 out:
 	if (rehash)
 		d_rehash(rehash);
 	if (!error) {
 		d_move(old_dentry, new_dentry);
-		nfs_renew_times(new_dentry);
-		nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir));
+		nfs_set_verifier(new_dentry,
+					nfs_save_change_attribute(new_dir));
 	}
 
 	/* new dentry created? */
@@ -1842,7 +1751,7 @@
 	return NULL;
 }
 
-int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_access_entry *cache;
@@ -1854,7 +1763,7 @@
 	cache = nfs_access_search_rbtree(inode, cred);
 	if (cache == NULL)
 		goto out;
-	if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+	if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
 		goto out_stale;
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
@@ -1909,7 +1818,7 @@
 	nfs_access_free_entry(entry);
 }
 
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 {
 	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
 	if (cache == NULL)
@@ -1957,6 +1866,24 @@
 	return -EACCES;
 }
 
+static int nfs_open_permission_mask(int openflags)
+{
+	int mask = 0;
+
+	if (openflags & FMODE_READ)
+		mask |= MAY_READ;
+	if (openflags & FMODE_WRITE)
+		mask |= MAY_WRITE;
+	if (openflags & FMODE_EXEC)
+		mask |= MAY_EXEC;
+	return mask;
+}
+
+int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
+{
+	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
+}
+
 int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	struct rpc_cred *cred;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fcf4d38..32fe972 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -368,7 +368,7 @@
 		return -ENOMEM;
 
 	dreq->inode = inode;
-	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
@@ -510,7 +510,6 @@
 			nfs_direct_write_reschedule(dreq);
 			break;
 		default:
-			nfs_end_data_update(inode);
 			if (dreq->commit_data != NULL)
 				nfs_commit_free(dreq->commit_data);
 			nfs_direct_free_writedata(dreq);
@@ -533,7 +532,6 @@
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
 {
-	nfs_end_data_update(inode);
 	nfs_direct_free_writedata(dreq);
 	nfs_zap_mapping(inode, inode->i_mapping);
 	nfs_direct_complete(dreq);
@@ -718,14 +716,12 @@
 		sync = FLUSH_STABLE;
 
 	dreq->inode = inode;
-	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
 	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
 
-	nfs_begin_data_update(inode);
-
 	rpc_clnt_sigmask(clnt, &oldset);
 	result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
 	if (!result)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 579cf8a..c664bb9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -33,6 +33,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
@@ -55,6 +56,8 @@
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
 
+static struct vm_operations_struct nfs_file_vm_ops;
+
 const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
@@ -174,13 +177,38 @@
 }
 
 /*
+ * Helper for nfs_file_flush() and nfs_fsync()
+ *
+ * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
+ * disk, but it retrieves and clears ctx->error after synching, despite
+ * the two being set at the same time in nfs_context_set_write_error().
+ * This is because the former is used to notify the _next_ call to
+ * nfs_file_write() that a write error occured, and hence cause it to
+ * fall back to doing a synchronous write.
+ */
+static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
+{
+	int have_error, status;
+	int ret = 0;
+
+	have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+	status = nfs_wb_all(inode);
+	have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+	if (have_error)
+		ret = xchg(&ctx->error, 0);
+	if (!ret)
+		ret = status;
+	return ret;
+}
+
+/*
  * Flush all dirty pages, and check for write errors.
  *
  */
 static int
 nfs_file_flush(struct file *file, fl_owner_t id)
 {
-	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct inode	*inode = file->f_path.dentry->d_inode;
 	int		status;
 
@@ -189,16 +217,11 @@
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
 	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
-	lock_kernel();
+
 	/* Ensure that data+attribute caches are up to date after close() */
-	status = nfs_wb_all(inode);
-	if (!status) {
-		status = ctx->error;
-		ctx->error = 0;
-		if (!status)
-			nfs_revalidate_inode(NFS_SERVER(inode), inode);
-	}
-	unlock_kernel();
+	status = nfs_do_fsync(ctx, inode);
+	if (!status)
+		nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	return status;
 }
 
@@ -257,8 +280,11 @@
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
 	status = nfs_revalidate_mapping(inode, file->f_mapping);
-	if (!status)
-		status = generic_file_mmap(file, vma);
+	if (!status) {
+		vma->vm_ops = &nfs_file_vm_ops;
+		vma->vm_flags |= VM_CAN_NONLINEAR;
+		file_accessed(file);
+	}
 	return status;
 }
 
@@ -270,21 +296,13 @@
 static int
 nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct inode *inode = dentry->d_inode;
-	int status;
 
 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
 	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
-	lock_kernel();
-	status = nfs_wb_all(inode);
-	if (!status) {
-		status = ctx->error;
-		ctx->error = 0;
-	}
-	unlock_kernel();
-	return status;
+	return nfs_do_fsync(ctx, inode);
 }
 
 /*
@@ -333,7 +351,7 @@
 const struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
-	.set_page_dirty = nfs_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_nobuffers,
 	.writepage = nfs_writepage,
 	.writepages = nfs_writepages,
 	.prepare_write = nfs_prepare_write,
@@ -346,6 +364,43 @@
 	.launder_page = nfs_launder_page,
 };
 
+static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	struct file *filp = vma->vm_file;
+	unsigned pagelen;
+	int ret = -EINVAL;
+
+	lock_page(page);
+	if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
+		goto out_unlock;
+	pagelen = nfs_page_length(page);
+	if (pagelen == 0)
+		goto out_unlock;
+	ret = nfs_prepare_write(filp, page, 0, pagelen);
+	if (!ret)
+		ret = nfs_commit_write(filp, page, 0, pagelen);
+out_unlock:
+	unlock_page(page);
+	return ret;
+}
+
+static struct vm_operations_struct nfs_file_vm_ops = {
+	.fault = filemap_fault,
+	.page_mkwrite = nfs_vm_page_mkwrite,
+};
+
+static int nfs_need_sync_write(struct file *filp, struct inode *inode)
+{
+	struct nfs_open_context *ctx;
+
+	if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
+		return 1;
+	ctx = nfs_file_open_context(filp);
+	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
+		return 1;
+	return 0;
+}
+
 static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
@@ -382,8 +437,8 @@
 	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
 	/* Return error values for O_SYNC and IS_SYNC() */
-	if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
-		int err = nfs_fsync(iocb->ki_filp, dentry, 1);
+	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
+		int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
 		if (err < 0)
 			result = err;
 	}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 71a49c3..035c769 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -49,6 +49,11 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
+#define NFS_64_BIT_INODE_NUMBERS_ENABLED	1
+
+/* Default is to see 64-bit inode numbers */
+static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
+
 static void nfs_invalidate_inode(struct inode *);
 static int nfs_update_inode(struct inode *, struct nfs_fattr *);
 
@@ -62,6 +67,25 @@
 	return nfs_fileid_to_ino_t(fattr->fileid);
 }
 
+/**
+ * nfs_compat_user_ino64 - returns the user-visible inode number
+ * @fileid: 64-bit fileid
+ *
+ * This function returns a 32-bit inode number if the boot parameter
+ * nfs.enable_ino64 is zero.
+ */
+u64 nfs_compat_user_ino64(u64 fileid)
+{
+	int ino;
+
+	if (enable_ino64)
+		return fileid;
+	ino = fileid;
+	if (sizeof(ino) < sizeof(fileid))
+		ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8;
+	return ino;
+}
+
 int nfs_write_inode(struct inode *inode, int sync)
 {
 	int ret;
@@ -85,7 +109,6 @@
 	 */
 	BUG_ON(nfs_have_writebacks(inode));
 	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
-	BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
 	nfs_zap_acl_cache(inode);
 	nfs_access_zap_cache(inode);
 }
@@ -118,8 +141,8 @@
 
 	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 
-	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
-	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
+	nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+	nfsi->attrtimeo_timestamp = jiffies;
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
@@ -156,6 +179,13 @@
 	spin_unlock(&inode->i_lock);
 }
 
+void nfs_invalidate_atime(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&inode->i_lock);
+}
+
 /*
  * Invalidate, but do not unhash, the inode.
  * NB: must be called with inode->i_lock held!
@@ -338,7 +368,6 @@
 		return 0;
 
 	lock_kernel();
-	nfs_begin_data_update(inode);
 	/* Write all dirty data */
 	if (S_ISREG(inode->i_mode)) {
 		filemap_write_and_wait(inode->i_mapping);
@@ -352,7 +381,6 @@
 	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
 	if (error == 0)
 		nfs_refresh_inode(inode, &fattr);
-	nfs_end_data_update(inode);
 	unlock_kernel();
 	return error;
 }
@@ -431,7 +459,7 @@
 
 	/* Flush out writes to the server in order to update c/mtime */
 	if (S_ISREG(inode->i_mode))
-		nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
+		nfs_wb_nocommit(inode);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
@@ -450,8 +478,10 @@
 		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	else
 		err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
-	if (!err)
+	if (!err) {
 		generic_fillattr(inode, stat);
+		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+	}
 	return err;
 }
 
@@ -536,7 +566,7 @@
 static void nfs_file_clear_open_context(struct file *filp)
 {
 	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
+	struct nfs_open_context *ctx = nfs_file_open_context(filp);
 
 	if (ctx) {
 		filp->private_data = NULL;
@@ -598,16 +628,10 @@
 	status = nfs_wait_on_inode(inode);
 	if (status < 0)
 		goto out;
-	if (NFS_STALE(inode)) {
-		status = -ESTALE;
-		/* Do we trust the cached ESTALE? */
-		if (NFS_ATTRTIMEO(inode) != 0) {
-			if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) {
-				/* no */
-			} else
-				goto out;
-		}
-	}
+
+	status = -ESTALE;
+	if (NFS_STALE(inode))
+		goto out;
 
 	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
 	if (status != 0) {
@@ -654,7 +678,7 @@
 
 	if (nfs_have_delegation(inode, FMODE_READ))
 		return 0;
-	return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
+	return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
 }
 
 /**
@@ -683,11 +707,8 @@
 	}
 	spin_lock(&inode->i_lock);
 	nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
-	if (S_ISDIR(inode->i_mode)) {
+	if (S_ISDIR(inode->i_mode))
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		/* This ensures we revalidate child dentries */
-		nfsi->cache_change_attribute = jiffies;
-	}
 	spin_unlock(&inode->i_lock);
 	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
@@ -756,56 +777,27 @@
 	return ret;
 }
 
-/**
- * nfs_begin_data_update
- * @inode - pointer to inode
- * Declare that a set of operations will update file data on the server
- */
-void nfs_begin_data_update(struct inode *inode)
-{
-	atomic_inc(&NFS_I(inode)->data_updates);
-}
-
-/**
- * nfs_end_data_update
- * @inode - pointer to inode
- * Declare end of the operations that will update file data
- * This will mark the inode as immediately needing revalidation
- * of its attribute cache.
- */
-void nfs_end_data_update(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	/* Directories: invalidate page cache */
-	if (S_ISDIR(inode->i_mode)) {
-		spin_lock(&inode->i_lock);
-		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-		spin_unlock(&inode->i_lock);
-	}
-	nfsi->cache_change_attribute = jiffies;
-	atomic_dec(&nfsi->data_updates);
-}
-
 static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	unsigned long now = jiffies;
 
+	if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
+			nfsi->change_attr == fattr->pre_change_attr) {
+		nfsi->change_attr = fattr->change_attr;
+		if (S_ISDIR(inode->i_mode))
+			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+	}
 	/* If we have atomic WCC data, we may update some attributes */
 	if ((fattr->valid & NFS_ATTR_WCC) != 0) {
-		if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
+		if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
 			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-			nfsi->cache_change_attribute = now;
-		}
 		if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
 			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-			nfsi->cache_change_attribute = now;
+			if (S_ISDIR(inode->i_mode))
+				nfsi->cache_validity |= NFS_INO_INVALID_DATA;
 		}
-		if (inode->i_size == fattr->pre_size && nfsi->npages == 0) {
+		if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
 			inode->i_size = fattr->size;
-			nfsi->cache_change_attribute = now;
-		}
 	}
 }
 
@@ -822,7 +814,7 @@
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t cur_size, new_isize;
-	int data_unstable;
+	unsigned long invalid = 0;
 
 
 	/* Has the inode gone and changed behind our back? */
@@ -831,37 +823,41 @@
 		return -EIO;
 	}
 
-	/* Are we in the process of updating data on the server? */
-	data_unstable = nfs_caches_unstable(inode);
-
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
 
 	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
 			nfsi->change_attr != fattr->change_attr)
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	cur_size = i_size_read(inode);
  	new_isize = nfs_size_to_loff_t(fattr->size);
 	if (cur_size != new_isize && nfsi->npages == 0)
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
 
 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+		invalid |= NFS_INO_INVALID_ATTR;
 
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
-		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
+		invalid |= NFS_INO_INVALID_ATIME;
+
+	if (invalid != 0)
+		nfsi->cache_validity |= invalid;
+	else
+		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ATIME
+				| NFS_INO_REVAL_PAGECACHE);
 
 	nfsi->read_cache_jiffies = fattr->time_start;
 	return 0;
@@ -911,17 +907,41 @@
 int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int status = 0;
 
 	spin_lock(&inode->i_lock);
-	if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
-		nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
-		goto out;
-	}
-	status = nfs_update_inode(inode, fattr);
-out:
+	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+	if (S_ISDIR(inode->i_mode))
+		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
 	spin_unlock(&inode->i_lock);
-	return status;
+	return nfs_refresh_inode(inode, fattr);
+}
+
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
+			(fattr->valid & NFS_ATTR_WCC_V4) == 0) {
+		fattr->pre_change_attr = NFS_I(inode)->change_attr;
+		fattr->valid |= NFS_ATTR_WCC_V4;
+	}
+	if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
+			(fattr->valid & NFS_ATTR_WCC) == 0) {
+		memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
+		memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
+		fattr->pre_size = inode->i_size;
+		fattr->valid |= NFS_ATTR_WCC;
+	}
+	return nfs_post_op_update_inode(inode, fattr);
 }
 
 /*
@@ -941,9 +961,8 @@
 	struct nfs_server *server;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t cur_isize, new_isize;
-	unsigned int	invalid = 0;
+	unsigned long invalid = 0;
 	unsigned long now = jiffies;
-	int data_stable;
 
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
 			__FUNCTION__, inode->i_sb->s_id, inode->i_ino,
@@ -968,57 +987,51 @@
 	 * Update the read time so we don't revalidate too often.
 	 */
 	nfsi->read_cache_jiffies = fattr->time_start;
-	nfsi->last_updated = now;
 
-	/* Fix a wraparound issue with nfsi->cache_change_attribute */
-	if (time_before(now, nfsi->cache_change_attribute))
-		nfsi->cache_change_attribute = now - 600*HZ;
-
-	/* Are we racing with known updates of the metadata on the server? */
-	data_stable = nfs_verify_change_attribute(inode, fattr->time_start);
-	if (data_stable)
-		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME);
+	nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
+			| NFS_INO_REVAL_PAGECACHE);
 
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
 
+	/* More cache consistency checks */
+	if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
+		/* NFSv2/v3: Check if the mtime agrees */
+		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
+			dprintk("NFS: mtime change on server for file %s/%ld\n",
+					inode->i_sb->s_id, inode->i_ino);
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+			nfsi->cache_change_attribute = now;
+		}
+		/* If ctime has changed we should definitely clear access+acl caches */
+		if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
+			invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+	} else if (nfsi->change_attr != fattr->change_attr) {
+		dprintk("NFS: change_attr change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		nfsi->cache_change_attribute = now;
+	}
+
 	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
 	cur_isize = i_size_read(inode);
 	if (new_isize != cur_isize) {
-		/* Do we perhaps have any outstanding writes? */
-		if (nfsi->npages == 0) {
-			/* No, but did we race with nfs_end_data_update()? */
-			if (data_stable) {
-				inode->i_size = new_isize;
-				invalid |= NFS_INO_INVALID_DATA;
-			}
-			invalid |= NFS_INO_INVALID_ATTR;
-		} else if (new_isize > cur_isize) {
+		/* Do we perhaps have any outstanding writes, or has
+		 * the file grown beyond our last write? */
+		if (nfsi->npages == 0 || new_isize > cur_isize) {
 			inode->i_size = new_isize;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 		}
-		nfsi->cache_change_attribute = now;
 		dprintk("NFS: isize change on server for file %s/%ld\n",
 				inode->i_sb->s_id, inode->i_ino);
 	}
 
-	/* Check if the mtime agrees */
-	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
-		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-		dprintk("NFS: mtime change on server for file %s/%ld\n",
-				inode->i_sb->s_id, inode->i_ino);
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
-		nfsi->cache_change_attribute = now;
-	}
 
-	/* If ctime has changed we should definitely clear access+acl caches */
-	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
-		invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-		nfsi->cache_change_attribute = now;
-	}
+	memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
 	memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+	nfsi->change_attr = fattr->change_attr;
 
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	    inode->i_uid != fattr->uid ||
@@ -1039,31 +1052,29 @@
  		inode->i_blocks = fattr->du.nfs2.blocks;
  	}
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-			nfsi->change_attr != fattr->change_attr) {
-		dprintk("NFS: change_attr change on server for file %s/%ld\n",
-				inode->i_sb->s_id, inode->i_ino);
-		nfsi->change_attr = fattr->change_attr;
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		nfsi->cache_change_attribute = now;
-	}
-
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
 		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = now;
-	} else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
-		if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
-			nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
-		nfsi->attrtimeo_timestamp = now;
+		nfsi->last_updated = now;
+	} else {
+		if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
+			if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
+				nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+			nfsi->attrtimeo_timestamp = now;
+		}
+		/*
+		 * Avoid jiffy wraparound issues with nfsi->last_updated
+		 */
+		if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now))
+			nfsi->last_updated = nfsi->read_cache_jiffies;
 	}
+	invalid &= ~NFS_INO_INVALID_ATTR;
 	/* Don't invalidate the data if we were to blame */
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
 				|| S_ISLNK(inode->i_mode)))
 		invalid &= ~NFS_INO_INVALID_DATA;
-	if (data_stable)
-		invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
 	if (!nfs_have_delegation(inode, FMODE_READ) ||
 			(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
 		nfsi->cache_validity |= invalid;
@@ -1152,7 +1163,6 @@
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
-	atomic_set(&nfsi->data_updates, 0);
 	nfsi->ncommit = 0;
 	nfsi->npages = 0;
 	nfs4_init_once(nfsi);
@@ -1249,6 +1259,7 @@
 /* Not quite true; I just maintain it */
 MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
 MODULE_LICENSE("GPL");
+module_param(enable_ino64, bool, 0644);
 
 module_init(init_nfs_fs)
 module_exit(exit_nfs_fs)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 76cf55d..f3acf48 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,8 +5,6 @@
 #include <linux/mount.h>
 
 struct nfs_string;
-struct nfs_mount_data;
-struct nfs4_mount_data;
 
 /* Maximum number of readahead requests
  * FIXME: this should really be a sysctl so that users may tune it to suit
@@ -27,20 +25,50 @@
 	rpc_authflavor_t authflavor;
 };
 
+/*
+ * In-kernel mount arguments
+ */
+struct nfs_parsed_mount_data {
+	int			flags;
+	int			rsize, wsize;
+	int			timeo, retrans;
+	int			acregmin, acregmax,
+				acdirmin, acdirmax;
+	int			namlen;
+	unsigned int		bsize;
+	unsigned int		auth_flavor_len;
+	rpc_authflavor_t	auth_flavors[1];
+	char			*client_address;
+
+	struct {
+		struct sockaddr_in	address;
+		char			*hostname;
+		unsigned int		program;
+		unsigned int		version;
+		unsigned short		port;
+		int			protocol;
+	} mount_server;
+
+	struct {
+		struct sockaddr_in	address;
+		char			*hostname;
+		char			*export_path;
+		unsigned int		program;
+		int			protocol;
+	} nfs_server;
+};
+
 /* client.c */
 extern struct rpc_program nfs_program;
 
 extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
-extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
-					    struct nfs_fh *);
-extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
-					     const char *,
-					     const struct sockaddr_in *,
-					     const char *,
-					     const char *,
-					     rpc_authflavor_t,
-					     struct nfs_fh *);
+extern struct nfs_server *nfs_create_server(
+					const struct nfs_parsed_mount_data *,
+					struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(
+					const struct nfs_parsed_mount_data *,
+					struct nfs_fh *);
 extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
 						      struct nfs_fh *);
 extern void nfs_free_server(struct nfs_server *server);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c5fce75..668ab96 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -251,6 +251,7 @@
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
 	xdr_inline_pages(&req->rq_rcv_buf, replen,
 			 args->pages, args->pgbase, count);
+	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	return 0;
 }
 
@@ -271,7 +272,7 @@
 	res->eof = 0;
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
-		printk(KERN_WARNING "NFS: READ reply header overflowed:"
+		dprintk("NFS: READ reply header overflowed:"
 				"length %d > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
@@ -281,7 +282,7 @@
 
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (count > recvd) {
-		printk(KERN_WARNING "NFS: server cheating in read reply: "
+		dprintk("NFS: server cheating in read reply: "
 			"count %d > recvd %d\n", count, recvd);
 		count = recvd;
 	}
@@ -313,6 +314,7 @@
 
 	/* Copy the page array */
 	xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+	sndbuf->flags |= XDRBUF_WRITE;
 	return 0;
 }
 
@@ -431,7 +433,7 @@
 
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
-		printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+		dprintk("NFS: READDIR reply header overflowed:"
 				"length %d > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
@@ -454,7 +456,7 @@
 		len = ntohl(*p++);
 		p += XDR_QUADLEN(len) + 1;	/* name plus cookie */
 		if (len > NFS2_MAXNAMLEN) {
-			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
+			dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
 						len);
 			goto err_unmap;
 		}
@@ -471,7 +473,7 @@
 	entry[0] = entry[1] = 0;
 	/* truncate listing ? */
 	if (!nr) {
-		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		dprintk("NFS: readdir reply truncated!\n");
 		entry[1] = 1;
 	}
 	goto out;
@@ -583,12 +585,12 @@
 	/* Convert length of symlink */
 	len = ntohl(*p++);
 	if (len >= rcvbuf->page_len || len <= 0) {
-		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		dprintk("nfs: server returned giant symlink!\n");
 		return -ENAMETOOLONG;
 	}
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
-		printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+		dprintk("NFS: READLINK reply header overflowed:"
 				"length %d > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
@@ -597,7 +599,7 @@
 	}
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (recvd < len) {
-		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+		dprintk("NFS: server cheating in readlink reply: "
 				"count %u > recvd %u\n", len, recvd);
 		return -EIO;
 	}
@@ -695,7 +697,7 @@
 		if (nfs_errtbl[i].stat == stat)
 			return nfs_errtbl[i].errno;
 	}
-	printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+	dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
 	return nfs_errtbl[i].errno;
 }
 
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7322da4..9b73625 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -317,13 +317,11 @@
 	}
 
 	dprintk("NFS call setacl\n");
-	nfs_begin_data_update(inode);
 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
 	status = rpc_call_sync(server->client_acl, &msg, 0);
 	spin_lock(&inode->i_lock);
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
 	spin_unlock(&inode->i_lock);
-	nfs_end_data_update(inode);
 	dprintk("NFS reply setacl: %d\n", status);
 
 	/* pages may have been allocated at the xdr layer. */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c7ca5d7..4cdc236 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -166,6 +166,7 @@
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
 		msg.rpc_argp = fhandle;
@@ -173,8 +174,6 @@
 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	dprintk("NFS reply lookup: %d\n", status);
-	if (status >= 0)
-		status = nfs_refresh_inode(dir, &dir_attr);
 	return status;
 }
 
@@ -607,6 +606,9 @@
 
 	nfs_fattr_init(&dir_attr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+	nfs_invalidate_atime(dir);
+
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply readdir: %d\n", status);
 	return status;
@@ -724,9 +726,9 @@
 {
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
-	/* Call back common NFS readpage processing */
-	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, &data->fattr);
+
+	nfs_invalidate_atime(data->inode);
+	nfs_refresh_inode(data->inode, &data->fattr);
 	return 0;
 }
 
@@ -747,7 +749,7 @@
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
 	return 0;
 }
 
@@ -775,8 +777,7 @@
 {
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
-	if (task->tk_status >= 0)
-		nfs_post_op_update_inode(data->inode, data->res.fattr);
+	nfs_refresh_inode(data->inode, data->res.fattr);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9e08f0..616d326 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -346,6 +346,7 @@
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
 	xdr_inline_pages(&req->rq_rcv_buf, replen,
 			 args->pages, args->pgbase, count);
+	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	return 0;
 }
 
@@ -367,6 +368,7 @@
 
 	/* Copy the page array */
 	xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+	sndbuf->flags |= XDRBUF_WRITE;
 	return 0;
 }
 
@@ -524,7 +526,7 @@
 
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
-		printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+		dprintk("NFS: READDIR reply header overflowed:"
 				"length %d > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
@@ -547,7 +549,7 @@
 		len = ntohl(*p++);		/* string length */
 		p += XDR_QUADLEN(len) + 2;	/* name + cookie */
 		if (len > NFS3_MAXNAMLEN) {
-			printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
+			dprintk("NFS: giant filename in readdir (len %x)!\n",
 						len);
 			goto err_unmap;
 		}
@@ -567,7 +569,7 @@
 					goto short_pkt;
 				len = ntohl(*p++);
 				if (len > NFS3_FHSIZE) {
-					printk(KERN_WARNING "NFS: giant filehandle in "
+					dprintk("NFS: giant filehandle in "
 						"readdir (len %x)!\n", len);
 					goto err_unmap;
 				}
@@ -588,7 +590,7 @@
 	entry[0] = entry[1] = 0;
 	/* truncate listing ? */
 	if (!nr) {
-		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		dprintk("NFS: readdir reply truncated!\n");
 		entry[1] = 1;
 	}
 	goto out;
@@ -826,22 +828,23 @@
 	/* Convert length of symlink */
 	len = ntohl(*p++);
 	if (len >= rcvbuf->page_len || len <= 0) {
-		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		dprintk("nfs: server returned giant symlink!\n");
 		return -ENAMETOOLONG;
 	}
 
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
-		printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+		dprintk("NFS: READLINK reply header overflowed:"
 				"length %d > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
-		dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+		dprintk("NFS: READLINK header is short. "
+			"iovec will be shifted.\n");
 		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
 	}
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (recvd < len) {
-		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+		dprintk("NFS: server cheating in readlink reply: "
 				"count %u > recvd %u\n", len, recvd);
 		return -EIO;
 	}
@@ -876,13 +879,13 @@
 	ocount   = ntohl(*p++);
 
 	if (ocount != count) {
-		printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
+		dprintk("NFS: READ count doesn't match RPC opaque count.\n");
 		return -errno_NFSERR_IO;
 	}
 
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
-		printk(KERN_WARNING "NFS: READ reply header overflowed:"
+		dprintk("NFS: READ reply header overflowed:"
 				"length %d > %Zu\n", hdrlen, iov->iov_len);
        		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
@@ -892,7 +895,7 @@
 
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (count > recvd) {
-		printk(KERN_WARNING "NFS: server cheating in read reply: "
+		dprintk("NFS: server cheating in read reply: "
 			"count %d > recvd %d\n", count, recvd);
 		count = recvd;
 		res->eof = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b90e17..cb99fd9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -62,10 +62,8 @@
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
-static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
-static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
 static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 
@@ -177,7 +175,7 @@
 		*p++ = xdr_one;                         /* bitmap length */
 		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
 		*p++ = htonl(8);              /* attribute buffer length */
-		p = xdr_encode_hyper(p, dentry->d_inode->i_ino);
+		p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
 	}
 	
 	*p++ = xdr_one;                                  /* next */
@@ -189,7 +187,7 @@
 	*p++ = xdr_one;                         /* bitmap length */
 	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
 	*p++ = htonl(8);              /* attribute buffer length */
-	p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino);
+	p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
 
 	readdir->pgbase = (char *)p - (char *)start;
 	readdir->count -= readdir->pgbase;
@@ -211,8 +209,9 @@
 
 	spin_lock(&dir->i_lock);
 	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
-	if (cinfo->before == nfsi->change_attr && cinfo->atomic)
-		nfsi->change_attr = cinfo->after;
+	if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
+		nfsi->cache_change_attribute = jiffies;
+	nfsi->change_attr = cinfo->after;
 	spin_unlock(&dir->i_lock);
 }
 
@@ -454,7 +453,7 @@
 		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
 		rcu_read_unlock();
 		lock_kernel();
-		ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
 		unlock_kernel();
 		if (ret != 0)
 			goto out;
@@ -948,36 +947,6 @@
 	return 0;
 }
 
-static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
-{
-	struct nfs_access_entry cache;
-	int mask = 0;
-	int status;
-
-	if (openflags & FMODE_READ)
-		mask |= MAY_READ;
-	if (openflags & FMODE_WRITE)
-		mask |= MAY_WRITE;
-	if (openflags & FMODE_EXEC)
-		mask |= MAY_EXEC;
-	status = nfs_access_get_cached(inode, cred, &cache);
-	if (status == 0)
-		goto out;
-
-	/* Be clever: ask server to check for all possible rights */
-	cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
-	cache.cred = cred;
-	cache.jiffies = jiffies;
-	status = _nfs4_proc_access(inode, &cache);
-	if (status != 0)
-		return status;
-	nfs_access_add_cache(inode, &cache);
-out:
-	if ((cache.mask & mask) == mask)
-		return 0;
-	return -EACCES;
-}
-
 static int nfs4_recover_expired_lease(struct nfs_server *server)
 {
 	struct nfs_client *clp = server->nfs_client;
@@ -1381,7 +1350,7 @@
 
 	/* If the open_intent is for execute, we have an extra check to make */
 	if (nd->intent.open.flags & FMODE_EXEC) {
-		ret = _nfs4_do_access(state->inode,
+		ret = nfs_may_open(state->inode,
 				state->owner->so_cred,
 				nd->intent.open.flags);
 		if (ret < 0)
@@ -1390,7 +1359,7 @@
 	filp = lookup_instantiate_filp(nd, path->dentry, NULL);
 	if (!IS_ERR(filp)) {
 		struct nfs_open_context *ctx;
-		ctx = (struct nfs_open_context *)filp->private_data;
+		ctx = nfs_file_open_context(filp);
 		ctx->state = state;
 		return 0;
 	}
@@ -1428,13 +1397,16 @@
 	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
-		if (PTR_ERR(state) == -ENOENT)
+		if (PTR_ERR(state) == -ENOENT) {
 			d_add(dentry, NULL);
+			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+		}
 		return (struct dentry *)state;
 	}
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		path.dentry = res;
+	nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
 	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
@@ -1468,6 +1440,7 @@
 		}
 	}
 	if (state->inode == dentry->d_inode) {
+		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 		nfs4_intent_set_file(nd, &path, state);
 		return 1;
 	}
@@ -1757,10 +1730,16 @@
 
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
 	struct nfs4_accessargs args = {
 		.fh = NFS_FH(inode),
+		.bitmask = server->attr_bitmask,
 	};
-	struct nfs4_accessres res = { 0 };
+	struct nfs4_accessres res = {
+		.server = server,
+		.fattr = &fattr,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
 		.rpc_argp = &args,
@@ -1786,6 +1765,7 @@
 		if (mode & MAY_EXEC)
 			args.access |= NFS4_ACCESS_EXECUTE;
 	}
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (!status) {
 		entry->mask = 0;
@@ -1795,6 +1775,7 @@
 			entry->mask |= MAY_WRITE;
 		if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
 			entry->mask |= MAY_EXEC;
+		nfs_refresh_inode(inode, &fattr);
 	}
 	return status;
 }
@@ -1900,11 +1881,13 @@
 	}
 	state = nfs4_do_open(dir, &path, flags, sattr, cred);
 	put_rpccred(cred);
+	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
 		goto out;
 	}
-	d_instantiate(dentry, igrab(state->inode));
+	d_add(dentry, igrab(state->inode));
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	if (flags & O_EXCL) {
 		struct nfs_fattr fattr;
 		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
@@ -2218,6 +2201,9 @@
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+
+	nfs_invalidate_atime(dir);
+
 	dprintk("%s: returns %d\n", __FUNCTION__, status);
 	return status;
 }
@@ -2414,6 +2400,8 @@
 		rpc_restart_call(task);
 		return -EAGAIN;
 	}
+
+	nfs_invalidate_atime(data->inode);
 	if (task->tk_status > 0)
 		renew_lease(server, data->timestamp);
 	return 0;
@@ -2443,7 +2431,7 @@
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
-		nfs_post_op_update_inode(inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
 	}
 	return 0;
 }
@@ -2485,8 +2473,7 @@
 		rpc_restart_call(task);
 		return -EAGAIN;
 	}
-	if (task->tk_status >= 0)
-		nfs_post_op_update_inode(inode, data->res.fattr);
+	nfs_refresh_inode(inode, data->res.fattr);
 	return 0;
 }
 
@@ -3056,7 +3043,7 @@
 	if (status == 0) {
 		status = data->rpc_status;
 		if (status == 0)
-			nfs_post_op_update_inode(inode, &data->fattr);
+			nfs_refresh_inode(inode, &data->fattr);
 	}
 	rpc_put_task(task);
 	return status;
@@ -3303,7 +3290,7 @@
 	status = -ENOMEM;
 	if (seqid == NULL)
 		goto out;
-	task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid);
+	task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
 	status = PTR_ERR(task);
 	if (IS_ERR(task))
 		goto out;
@@ -3447,7 +3434,7 @@
 	int ret;
 
 	dprintk("%s: begin!\n", __FUNCTION__);
-	data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data,
+	data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
 			fl->fl_u.nfs4_fl.owner);
 	if (data == NULL)
 		return -ENOMEM;
@@ -3573,7 +3560,7 @@
 	int status;
 
 	/* verify open state */
-	ctx = (struct nfs_open_context *)filp->private_data;
+	ctx = nfs_file_open_context(filp);
 	state = ctx->state;
 
 	if (request->fl_start < 0 || request->fl_end < 0)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3e4adf8..bfb3626 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -774,7 +774,7 @@
 	for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
 		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
-		if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
+		if (nfs_file_open_context(fl->fl_file)->state != state)
 			continue;
 		status = ops->recover_lock(state, fl);
 		if (status >= 0)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index badd73b..51dd380 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -376,10 +376,12 @@
 				decode_locku_maxsz)
 #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				encode_access_maxsz)
+				encode_access_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_access_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				decode_access_maxsz)
+				decode_access_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_getattr_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
@@ -562,7 +564,6 @@
 
 #define RESERVE_SPACE(nbytes)	do {				\
 	p = xdr_reserve_space(xdr, nbytes);			\
-	if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
 	BUG_ON(!p);						\
 } while (0)
 
@@ -628,8 +629,8 @@
 	if (iap->ia_valid & ATTR_UID) {
 		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
 		if (owner_namelen < 0) {
-			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
-			       iap->ia_uid);
+			dprintk("nfs: couldn't resolve uid %d to string\n",
+					iap->ia_uid);
 			/* XXX */
 			strcpy(owner_name, "nobody");
 			owner_namelen = sizeof("nobody") - 1;
@@ -640,8 +641,8 @@
 	if (iap->ia_valid & ATTR_GID) {
 		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
 		if (owner_grouplen < 0) {
-			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
-			       iap->ia_gid);
+			dprintk("nfs: couldn't resolve gid %d to string\n",
+					iap->ia_gid);
 			strcpy(owner_group, "nobody");
 			owner_grouplen = sizeof("nobody") - 1;
 			/* goto out; */
@@ -711,7 +712,7 @@
 	 * Now we backfill the bitmap and the attribute buffer length.
 	 */
 	if (len != ((char *)p - (char *)q) + 4) {
-		printk ("encode_attr: Attr length calculation error! %u != %Zu\n",
+		printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n",
 				len, ((char *)p - (char *)q) + 4);
 		BUG();
 	}
@@ -1376,14 +1377,20 @@
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
-	if ((status = encode_putfh(&xdr, args->fh)) == 0)
-		status = encode_access(&xdr, args->access);
+	status = encode_putfh(&xdr, args->fh);
+	if (status != 0)
+		goto out;
+	status = encode_access(&xdr, args->access);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
 	return status;
 }
 
@@ -1857,6 +1864,7 @@
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
 	xdr_inline_pages(&req->rq_rcv_buf, replen,
 			 args->pages, args->pgbase, args->count);
+	req->rq_rcv_buf.flags |= XDRBUF_READ;
 out:
 	return status;
 }
@@ -1933,6 +1941,7 @@
 	status = encode_write(&xdr, args);
 	if (status)
 		goto out;
+	req->rq_snd_buf.flags |= XDRBUF_WRITE;
 	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
@@ -2180,9 +2189,9 @@
 #define READ_BUF(nbytes)  do { \
 	p = xdr_inline_decode(xdr, nbytes); \
 	if (unlikely(!p)) { \
-		printk(KERN_INFO "%s: prematurely hit end of receive" \
+		dprintk("nfs: %s: prematurely hit end of receive" \
 				" buffer\n", __FUNCTION__); \
-		printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
+		dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
 				__FUNCTION__, xdr->p, nbytes, xdr->end); \
 		return -EIO; \
 	} \
@@ -2223,9 +2232,8 @@
 	READ_BUF(8);
 	READ32(opnum);
 	if (opnum != expected) {
-		printk(KERN_NOTICE
-				"nfs4_decode_op_hdr: Server returned operation"
-			       	" %d but we issued a request for %d\n",
+		dprintk("nfs: Server returned operation"
+			" %d but we issued a request for %d\n",
 				opnum, expected);
 		return -EIO;
 	}
@@ -2758,7 +2766,7 @@
 				dprintk("%s: nfs_map_name_to_uid failed!\n",
 						__FUNCTION__);
 		} else
-			printk(KERN_WARNING "%s: name too long (%u)!\n",
+			dprintk("%s: name too long (%u)!\n",
 					__FUNCTION__, len);
 		bitmap[1] &= ~FATTR4_WORD1_OWNER;
 	}
@@ -2783,7 +2791,7 @@
 				dprintk("%s: nfs_map_group_to_gid failed!\n",
 						__FUNCTION__);
 		} else
-			printk(KERN_WARNING "%s: name too long (%u)!\n",
+			dprintk("%s: name too long (%u)!\n",
 					__FUNCTION__, len);
 		bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
 	}
@@ -2950,7 +2958,8 @@
 	unsigned int nwords = xdr->p - savep;
 
 	if (unlikely(attrwords != nwords)) {
-		printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n",
+		dprintk("%s: server returned incorrect attribute length: "
+			"%u %c %u\n",
 				__FUNCTION__,
 				attrwords << 2,
 				(attrwords < nwords) ? '<' : '>',
@@ -3451,7 +3460,7 @@
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (count > recvd) {
-		printk(KERN_WARNING "NFS: server cheating in read reply: "
+		dprintk("NFS: server cheating in read reply: "
 				"count %u > recvd %u\n", count, recvd);
 		count = recvd;
 		eof = 0;
@@ -3500,7 +3509,8 @@
 		p += 2;			/* cookie */
 		len = ntohl(*p++);	/* filename length */
 		if (len > NFS4_MAXNAMLEN) {
-			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+			dprintk("NFS: giant filename in readdir (len 0x%x)\n",
+					len);
 			goto err_unmap;
 		}
 		xlen = XDR_QUADLEN(len);
@@ -3528,7 +3538,7 @@
 	entry[0] = entry[1] = 0;
 	/* truncate listing ? */
 	if (!nr) {
-		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		dprintk("NFS: readdir reply truncated!\n");
 		entry[1] = 1;
 	}
 	goto out;
@@ -3554,13 +3564,13 @@
 	READ_BUF(4);
 	READ32(len);
 	if (len >= rcvbuf->page_len || len <= 0) {
-		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		dprintk("nfs: server returned giant symlink!\n");
 		return -ENAMETOOLONG;
 	}
 	hdrlen = (char *) xdr->p - (char *) iov->iov_base;
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (recvd < len) {
-		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+		dprintk("NFS: server cheating in readlink reply: "
 				"count %u > recvd %u\n", len, recvd);
 		return -EIO;
 	}
@@ -3643,7 +3653,7 @@
 		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
 		recvd = req->rq_rcv_buf.len - hdrlen;
 		if (attrlen > recvd) {
-			printk(KERN_WARNING "NFS: server cheating in getattr"
+			dprintk("NFS: server cheating in getattr"
 					" acl reply: attrlen %u > recvd %u\n",
 					attrlen, recvd);
 			return -EINVAL;
@@ -3688,8 +3698,7 @@
 	READ_BUF(8);
 	READ32(opnum);
 	if (opnum != OP_SETCLIENTID) {
-		printk(KERN_NOTICE
-				"nfs4_decode_setclientid: Server returned operation"
+		dprintk("nfs: decode_setclientid: Server returned operation"
 			       	" %d\n", opnum);
 		return -EIO;
 	}
@@ -3783,8 +3792,13 @@
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
 	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
 		goto out;
-	if ((status = decode_putfh(&xdr)) == 0)
-		status = decode_access(&xdr, res);
+	status = decode_putfh(&xdr);
+	if (status != 0)
+		goto out;
+	status = decode_access(&xdr, res);
+	if (status != 0)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 3490322..e87b44e 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -76,6 +76,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
 #include <linux/nfs.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
@@ -491,7 +492,7 @@
 	struct sockaddr_in sin;
 	int status;
 	int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
-					IPPROTO_TCP : IPPROTO_UDP;
+					XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP;
 	int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
 					NFS_MNT3_VERSION : NFS_MNT_VERSION;
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 845cdde..97669ed 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -476,6 +476,8 @@
 	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 
+	nfs_invalidate_atime(dir);
+
 	dprintk("NFS reply readdir: %d\n", status);
 	return status;
 }
@@ -550,6 +552,7 @@
 
 static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
+	nfs_invalidate_atime(data->inode);
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -576,7 +579,7 @@
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
 	return 0;
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 19e0563..4587a86 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -341,9 +341,6 @@
 		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
 		nfs_mark_for_revalidate(data->inode);
 	}
-	spin_lock(&data->inode->i_lock);
-	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-	spin_unlock(&data->inode->i_lock);
 	return 0;
 }
 
@@ -497,8 +494,7 @@
 		if (ctx == NULL)
 			goto out_unlock;
 	} else
-		ctx = get_nfs_open_context((struct nfs_open_context *)
-				file->private_data);
+		ctx = get_nfs_open_context(nfs_file_open_context(file));
 
 	error = nfs_readpage_async(ctx, inode, page);
 
@@ -576,8 +572,7 @@
 		if (desc.ctx == NULL)
 			return -EBADF;
 	} else
-		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
-				filp->private_data);
+		desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
 	if (rsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
 	else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b878528..fa517ae 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -33,6 +33,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/xprtsock.h>
+#include <linux/sunrpc/xprtrdma.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -58,36 +60,6 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-
-struct nfs_parsed_mount_data {
-	int			flags;
-	int			rsize, wsize;
-	int			timeo, retrans;
-	int			acregmin, acregmax,
-				acdirmin, acdirmax;
-	int			namlen;
-	unsigned int		bsize;
-	unsigned int		auth_flavor_len;
-	rpc_authflavor_t	auth_flavors[1];
-	char			*client_address;
-
-	struct {
-		struct sockaddr_in	address;
-		unsigned int		program;
-		unsigned int		version;
-		unsigned short		port;
-		int			protocol;
-	} mount_server;
-
-	struct {
-		struct sockaddr_in	address;
-		char			*hostname;
-		char			*export_path;
-		unsigned int		program;
-		int			protocol;
-	} nfs_server;
-};
-
 enum {
 	/* Mount options that take no arguments */
 	Opt_soft, Opt_hard,
@@ -97,7 +69,7 @@
 	Opt_ac, Opt_noac,
 	Opt_lock, Opt_nolock,
 	Opt_v2, Opt_v3,
-	Opt_udp, Opt_tcp,
+	Opt_udp, Opt_tcp, Opt_rdma,
 	Opt_acl, Opt_noacl,
 	Opt_rdirplus, Opt_nordirplus,
 	Opt_sharecache, Opt_nosharecache,
@@ -116,7 +88,7 @@
 
 	/* Mount options that take string arguments */
 	Opt_sec, Opt_proto, Opt_mountproto,
-	Opt_addr, Opt_mounthost, Opt_clientaddr,
+	Opt_addr, Opt_mountaddr, Opt_clientaddr,
 
 	/* Mount options that are ignored */
 	Opt_userspace, Opt_deprecated,
@@ -143,6 +115,7 @@
 	{ Opt_v3, "v3" },
 	{ Opt_udp, "udp" },
 	{ Opt_tcp, "tcp" },
+	{ Opt_rdma, "rdma" },
 	{ Opt_acl, "acl" },
 	{ Opt_noacl, "noacl" },
 	{ Opt_rdirplus, "rdirplus" },
@@ -175,13 +148,14 @@
 	{ Opt_mountproto, "mountproto=%s" },
 	{ Opt_addr, "addr=%s" },
 	{ Opt_clientaddr, "clientaddr=%s" },
-	{ Opt_mounthost, "mounthost=%s" },
+	{ Opt_userspace, "mounthost=%s" },
+	{ Opt_mountaddr, "mountaddr=%s" },
 
 	{ Opt_err, NULL }
 };
 
 enum {
-	Opt_xprt_udp, Opt_xprt_tcp,
+	Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma,
 
 	Opt_xprt_err
 };
@@ -189,6 +163,7 @@
 static match_table_t nfs_xprt_protocol_tokens = {
 	{ Opt_xprt_udp, "udp" },
 	{ Opt_xprt_tcp, "tcp" },
+	{ Opt_xprt_rdma, "rdma" },
 
 	{ Opt_xprt_err, NULL }
 };
@@ -449,7 +424,7 @@
 		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
-		{ NFS_MOUNT_INTR, ",intr", "" },
+		{ NFS_MOUNT_INTR, ",intr", ",nointr" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
@@ -460,8 +435,6 @@
 	};
 	const struct proc_nfs_info *nfs_infop;
 	struct nfs_client *clp = nfss->nfs_client;
-	char buf[12];
-	const char *proto;
 
 	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
@@ -480,18 +453,8 @@
 		else
 			seq_puts(m, nfs_infop->nostr);
 	}
-	switch (nfss->client->cl_xprt->prot) {
-		case IPPROTO_TCP:
-			proto = "tcp";
-			break;
-		case IPPROTO_UDP:
-			proto = "udp";
-			break;
-		default:
-			snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
-			proto = buf;
-	}
-	seq_printf(m, ",proto=%s", proto);
+	seq_printf(m, ",proto=%s",
+		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
 	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
 	seq_printf(m, ",retrans=%u", clp->retrans_count);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
@@ -506,8 +469,8 @@
 
 	nfs_show_mount_options(m, nfss, 0);
 
-	seq_puts(m, ",addr=");
-	seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
+	seq_printf(m, ",addr="NIPQUAD_FMT,
+		NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
 
 	return 0;
 }
@@ -698,13 +661,19 @@
 			break;
 		case Opt_udp:
 			mnt->flags &= ~NFS_MOUNT_TCP;
-			mnt->nfs_server.protocol = IPPROTO_UDP;
+			mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
 			mnt->timeo = 7;
 			mnt->retrans = 5;
 			break;
 		case Opt_tcp:
 			mnt->flags |= NFS_MOUNT_TCP;
-			mnt->nfs_server.protocol = IPPROTO_TCP;
+			mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+			mnt->timeo = 600;
+			mnt->retrans = 2;
+			break;
+		case Opt_rdma:
+			mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
+			mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
 			mnt->timeo = 600;
 			mnt->retrans = 2;
 			break;
@@ -913,13 +882,20 @@
 			switch (token) {
 			case Opt_xprt_udp:
 				mnt->flags &= ~NFS_MOUNT_TCP;
-				mnt->nfs_server.protocol = IPPROTO_UDP;
+				mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
 				mnt->timeo = 7;
 				mnt->retrans = 5;
 				break;
 			case Opt_xprt_tcp:
 				mnt->flags |= NFS_MOUNT_TCP;
-				mnt->nfs_server.protocol = IPPROTO_TCP;
+				mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+				mnt->timeo = 600;
+				mnt->retrans = 2;
+				break;
+			case Opt_xprt_rdma:
+				/* vector side protocols to TCP */
+				mnt->flags |= NFS_MOUNT_TCP;
+				mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
 				mnt->timeo = 600;
 				mnt->retrans = 2;
 				break;
@@ -937,11 +913,12 @@
 
 			switch (token) {
 			case Opt_xprt_udp:
-				mnt->mount_server.protocol = IPPROTO_UDP;
+				mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
 				break;
 			case Opt_xprt_tcp:
-				mnt->mount_server.protocol = IPPROTO_TCP;
+				mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
 				break;
+			case Opt_xprt_rdma: /* not used for side protocols */
 			default:
 				goto out_unrec_xprt;
 			}
@@ -961,7 +938,7 @@
 				goto out_nomem;
 			mnt->client_address = string;
 			break;
-		case Opt_mounthost:
+		case Opt_mountaddr:
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
@@ -1027,16 +1004,10 @@
 		sin = args->mount_server.address;
 	else
 		sin = args->nfs_server.address;
-	if (args->mount_server.port == 0) {
-		status = rpcb_getport_sync(&sin,
-					   args->mount_server.program,
-					   args->mount_server.version,
-					   args->mount_server.protocol);
-		if (status < 0)
-			goto out_err;
-		sin.sin_port = htons(status);
-	} else
-		sin.sin_port = htons(args->mount_server.port);
+	/*
+	 * autobind will be used if mount_server.port == 0
+	 */
+	sin.sin_port = htons(args->mount_server.port);
 
 	/*
 	 * Now ask the mount server to map our export path
@@ -1049,14 +1020,11 @@
 			   args->mount_server.version,
 			   args->mount_server.protocol,
 			   root_fh);
-	if (status < 0)
-		goto out_err;
+	if (status == 0)
+		return 0;
 
-	return status;
-
-out_err:
-	dfprintk(MOUNT, "NFS: unable to contact server on host "
-		 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
+	dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
+			", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
 	return status;
 }
 
@@ -1079,15 +1047,31 @@
  * XXX: as far as I can tell, changing the NFS program number is not
  *      supported in the NFS client.
  */
-static int nfs_validate_mount_data(struct nfs_mount_data **options,
+static int nfs_validate_mount_data(void *options,
+				   struct nfs_parsed_mount_data *args,
 				   struct nfs_fh *mntfh,
 				   const char *dev_name)
 {
-	struct nfs_mount_data *data = *options;
+	struct nfs_mount_data *data = (struct nfs_mount_data *)options;
 
 	if (data == NULL)
 		goto out_no_data;
 
+	memset(args, 0, sizeof(*args));
+	args->flags		= (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
+	args->rsize		= NFS_MAX_FILE_IO_SIZE;
+	args->wsize		= NFS_MAX_FILE_IO_SIZE;
+	args->timeo		= 600;
+	args->retrans		= 2;
+	args->acregmin		= 3;
+	args->acregmax		= 60;
+	args->acdirmin		= 30;
+	args->acdirmax		= 60;
+	args->mount_server.protocol = XPRT_TRANSPORT_UDP;
+	args->mount_server.program = NFS_MNT_PROGRAM;
+	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+	args->nfs_server.program = NFS_PROGRAM;
+
 	switch (data->version) {
 	case 1:
 		data->namlen = 0;
@@ -1116,92 +1100,73 @@
 		if (mntfh->size < sizeof(mntfh->data))
 			memset(mntfh->data + mntfh->size, 0,
 			       sizeof(mntfh->data) - mntfh->size);
+
+		if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
+			goto out_no_address;
+
+		/*
+		 * Translate to nfs_parsed_mount_data, which nfs_fill_super
+		 * can deal with.
+		 */
+		args->flags		= data->flags;
+		args->rsize		= data->rsize;
+		args->wsize		= data->wsize;
+		args->flags		= data->flags;
+		args->timeo		= data->timeo;
+		args->retrans		= data->retrans;
+		args->acregmin		= data->acregmin;
+		args->acregmax		= data->acregmax;
+		args->acdirmin		= data->acdirmin;
+		args->acdirmax		= data->acdirmax;
+		args->nfs_server.address = data->addr;
+		if (!(data->flags & NFS_MOUNT_TCP))
+			args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+		/* N.B. caller will free nfs_server.hostname in all cases */
+		args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
+		args->namlen		= data->namlen;
+		args->bsize		= data->bsize;
+		args->auth_flavors[0]	= data->pseudoflavor;
 		break;
 	default: {
 		unsigned int len;
 		char *c;
 		int status;
-		struct nfs_parsed_mount_data args = {
-			.flags		= (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
-			.rsize		= NFS_MAX_FILE_IO_SIZE,
-			.wsize		= NFS_MAX_FILE_IO_SIZE,
-			.timeo		= 600,
-			.retrans	= 2,
-			.acregmin	= 3,
-			.acregmax	= 60,
-			.acdirmin	= 30,
-			.acdirmax	= 60,
-			.mount_server.protocol = IPPROTO_UDP,
-			.mount_server.program = NFS_MNT_PROGRAM,
-			.nfs_server.protocol = IPPROTO_TCP,
-			.nfs_server.program = NFS_PROGRAM,
-		};
 
-		if (nfs_parse_mount_options((char *) *options, &args) == 0)
+		if (nfs_parse_mount_options((char *)options, args) == 0)
 			return -EINVAL;
 
-		data = kzalloc(sizeof(*data), GFP_KERNEL);
-		if (data == NULL)
-			return -ENOMEM;
-
-		/*
-		 * NB: after this point, caller will free "data"
-		 * if we return an error
-		 */
-		*options = data;
+		if (!nfs_verify_server_address((struct sockaddr *)
+						&args->nfs_server.address))
+			goto out_no_address;
 
 		c = strchr(dev_name, ':');
 		if (c == NULL)
 			return -EINVAL;
 		len = c - dev_name;
-		if (len > sizeof(data->hostname))
-			return -ENAMETOOLONG;
-		strncpy(data->hostname, dev_name, len);
-		args.nfs_server.hostname = data->hostname;
+		/* N.B. caller will free nfs_server.hostname in all cases */
+		args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
 
 		c++;
 		if (strlen(c) > NFS_MAXPATHLEN)
 			return -ENAMETOOLONG;
-		args.nfs_server.export_path = c;
+		args->nfs_server.export_path = c;
 
-		status = nfs_try_mount(&args, mntfh);
+		status = nfs_try_mount(args, mntfh);
 		if (status)
 			return status;
 
-		/*
-		 * Translate to nfs_mount_data, which nfs_fill_super
-		 * can deal with.
-		 */
-		data->version		= 6;
-		data->flags		= args.flags;
-		data->rsize		= args.rsize;
-		data->wsize		= args.wsize;
-		data->timeo		= args.timeo;
-		data->retrans		= args.retrans;
-		data->acregmin		= args.acregmin;
-		data->acregmax		= args.acregmax;
-		data->acdirmin		= args.acdirmin;
-		data->acdirmax		= args.acdirmax;
-		data->addr		= args.nfs_server.address;
-		data->namlen		= args.namlen;
-		data->bsize		= args.bsize;
-		data->pseudoflavor	= args.auth_flavors[0];
-
 		break;
 		}
 	}
 
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
+	if (!(args->flags & NFS_MOUNT_SECFLAVOUR))
+		args->auth_flavors[0] = RPC_AUTH_UNIX;
 
 #ifndef CONFIG_NFS_V3
-	if (data->flags & NFS_MOUNT_VER3)
+	if (args->flags & NFS_MOUNT_VER3)
 		goto out_v3_not_compiled;
 #endif /* !CONFIG_NFS_V3 */
 
-	if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
-		goto out_no_address;
-
 	return 0;
 
 out_no_data:
@@ -1258,7 +1223,8 @@
 /*
  * Finish setting up an NFS2/3 superblock
  */
-static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
+static void nfs_fill_super(struct super_block *sb,
+			   struct nfs_parsed_mount_data *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
 
@@ -1379,7 +1345,7 @@
 	struct nfs_server *server = NULL;
 	struct super_block *s;
 	struct nfs_fh mntfh;
-	struct nfs_mount_data *data = raw_data;
+	struct nfs_parsed_mount_data data;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	struct nfs_sb_mountdata sb_mntdata = {
@@ -1388,12 +1354,12 @@
 	int error;
 
 	/* Validate the mount data */
-	error = nfs_validate_mount_data(&data, &mntfh, dev_name);
+	error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
 	if (error < 0)
 		goto out;
 
 	/* Get a volume representation */
-	server = nfs_create_server(data, &mntfh);
+	server = nfs_create_server(&data, &mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out;
@@ -1417,7 +1383,7 @@
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		nfs_fill_super(s, data);
+		nfs_fill_super(s, &data);
 	}
 
 	mntroot = nfs_get_root(s, &mntfh);
@@ -1432,8 +1398,7 @@
 	error = 0;
 
 out:
-	if (data != raw_data)
-		kfree(data);
+	kfree(data.nfs_server.hostname);
 	return error;
 
 out_err_nosb:
@@ -1559,38 +1524,49 @@
 /*
  * Validate NFSv4 mount options
  */
-static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
-				    const char *dev_name,
-				    struct sockaddr_in *addr,
-				    rpc_authflavor_t *authflavour,
-				    char **hostname,
-				    char **mntpath,
-				    char **ip_addr)
+static int nfs4_validate_mount_data(void *options,
+				    struct nfs_parsed_mount_data *args,
+				    const char *dev_name)
 {
-	struct nfs4_mount_data *data = *options;
+	struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
 	char *c;
 
 	if (data == NULL)
 		goto out_no_data;
 
+	memset(args, 0, sizeof(*args));
+	args->rsize		= NFS_MAX_FILE_IO_SIZE;
+	args->wsize		= NFS_MAX_FILE_IO_SIZE;
+	args->timeo		= 600;
+	args->retrans		= 2;
+	args->acregmin		= 3;
+	args->acregmax		= 60;
+	args->acdirmin		= 30;
+	args->acdirmax		= 60;
+	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+
 	switch (data->version) {
 	case 1:
-		if (data->host_addrlen != sizeof(*addr))
+		if (data->host_addrlen != sizeof(args->nfs_server.address))
 			goto out_no_address;
-		if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
+		if (copy_from_user(&args->nfs_server.address,
+				   data->host_addr,
+				   sizeof(args->nfs_server.address)))
 			return -EFAULT;
-		if (addr->sin_port == 0)
-			addr->sin_port = htons(NFS_PORT);
-		if (!nfs_verify_server_address((struct sockaddr *) addr))
+		if (args->nfs_server.address.sin_port == 0)
+			args->nfs_server.address.sin_port = htons(NFS_PORT);
+		if (!nfs_verify_server_address((struct sockaddr *)
+						&args->nfs_server.address))
 			goto out_no_address;
 
 		switch (data->auth_flavourlen) {
 		case 0:
-			*authflavour = RPC_AUTH_UNIX;
+			args->auth_flavors[0] = RPC_AUTH_UNIX;
 			break;
 		case 1:
-			if (copy_from_user(authflavour, data->auth_flavours,
-					   sizeof(*authflavour)))
+			if (copy_from_user(&args->auth_flavors[0],
+					   data->auth_flavours,
+					   sizeof(args->auth_flavors[0])))
 				return -EFAULT;
 			break;
 		default:
@@ -1600,75 +1576,57 @@
 		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
 		if (IS_ERR(c))
 			return PTR_ERR(c);
-		*hostname = c;
+		args->nfs_server.hostname = c;
 
 		c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
 		if (IS_ERR(c))
 			return PTR_ERR(c);
-		*mntpath = c;
-		dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
+		args->nfs_server.export_path = c;
+		dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
 
 		c = strndup_user(data->client_addr.data, 16);
 		if (IS_ERR(c))
 			return PTR_ERR(c);
-		*ip_addr = c;
+		args->client_address = c;
+
+		/*
+		 * Translate to nfs_parsed_mount_data, which nfs4_fill_super
+		 * can deal with.
+		 */
+
+		args->flags	= data->flags & NFS4_MOUNT_FLAGMASK;
+		args->rsize	= data->rsize;
+		args->wsize	= data->wsize;
+		args->timeo	= data->timeo;
+		args->retrans	= data->retrans;
+		args->acregmin	= data->acregmin;
+		args->acregmax	= data->acregmax;
+		args->acdirmin	= data->acdirmin;
+		args->acdirmax	= data->acdirmax;
+		args->nfs_server.protocol = data->proto;
 
 		break;
 	default: {
 		unsigned int len;
-		struct nfs_parsed_mount_data args = {
-			.rsize		= NFS_MAX_FILE_IO_SIZE,
-			.wsize		= NFS_MAX_FILE_IO_SIZE,
-			.timeo		= 600,
-			.retrans	= 2,
-			.acregmin	= 3,
-			.acregmax	= 60,
-			.acdirmin	= 30,
-			.acdirmax	= 60,
-			.nfs_server.protocol = IPPROTO_TCP,
-		};
 
-		if (nfs_parse_mount_options((char *) *options, &args) == 0)
+		if (nfs_parse_mount_options((char *)options, args) == 0)
 			return -EINVAL;
 
 		if (!nfs_verify_server_address((struct sockaddr *)
-						&args.nfs_server.address))
+						&args->nfs_server.address))
 			return -EINVAL;
-		*addr = args.nfs_server.address;
 
-		switch (args.auth_flavor_len) {
+		switch (args->auth_flavor_len) {
 		case 0:
-			*authflavour = RPC_AUTH_UNIX;
+			args->auth_flavors[0] = RPC_AUTH_UNIX;
 			break;
 		case 1:
-			*authflavour = (rpc_authflavor_t) args.auth_flavors[0];
 			break;
 		default:
 			goto out_inval_auth;
 		}
 
 		/*
-		 * Translate to nfs4_mount_data, which nfs4_fill_super
-		 * can deal with.
-		 */
-		data = kzalloc(sizeof(*data), GFP_KERNEL);
-		if (data == NULL)
-			return -ENOMEM;
-		*options = data;
-
-		data->version	= 1;
-		data->flags	= args.flags & NFS4_MOUNT_FLAGMASK;
-		data->rsize	= args.rsize;
-		data->wsize	= args.wsize;
-		data->timeo	= args.timeo;
-		data->retrans	= args.retrans;
-		data->acregmin	= args.acregmin;
-		data->acregmax	= args.acregmax;
-		data->acdirmin	= args.acdirmin;
-		data->acdirmax	= args.acdirmax;
-		data->proto	= args.nfs_server.protocol;
-
-		/*
 		 * Split "dev_name" into "hostname:mntpath".
 		 */
 		c = strchr(dev_name, ':');
@@ -1678,27 +1636,25 @@
 		len = c - dev_name;
 		if (len > NFS4_MAXNAMLEN)
 			return -ENAMETOOLONG;
-		*hostname = kzalloc(len, GFP_KERNEL);
-		if (*hostname == NULL)
+		args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
+		if (args->nfs_server.hostname == NULL)
 			return -ENOMEM;
-		strncpy(*hostname, dev_name, len - 1);
+		strncpy(args->nfs_server.hostname, dev_name, len - 1);
 
 		c++;			/* step over the ':' */
 		len = strlen(c);
 		if (len > NFS4_MAXPATHLEN)
 			return -ENAMETOOLONG;
-		*mntpath = kzalloc(len + 1, GFP_KERNEL);
-		if (*mntpath == NULL)
+		args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
+		if (args->nfs_server.export_path == NULL)
 			return -ENOMEM;
-		strncpy(*mntpath, c, len);
+		strncpy(args->nfs_server.export_path, c, len);
 
-		dprintk("MNTPATH: %s\n", *mntpath);
+		dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
 
-		if (args.client_address == NULL)
+		if (args->client_address == NULL)
 			goto out_no_client_address;
 
-		*ip_addr = args.client_address;
-
 		break;
 		}
 	}
@@ -1729,14 +1685,11 @@
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	struct nfs4_mount_data *data = raw_data;
+	struct nfs_parsed_mount_data data;
 	struct super_block *s;
 	struct nfs_server *server;
-	struct sockaddr_in addr;
-	rpc_authflavor_t authflavour;
 	struct nfs_fh mntfh;
 	struct dentry *mntroot;
-	char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	struct nfs_sb_mountdata sb_mntdata = {
 		.mntflags = flags,
@@ -1744,14 +1697,12 @@
 	int error;
 
 	/* Validate the mount data */
-	error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
-					 &hostname, &mntpath, &ip_addr);
+	error = nfs4_validate_mount_data(raw_data, &data, dev_name);
 	if (error < 0)
 		goto out;
 
 	/* Get a volume representation */
-	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
-				    authflavour, &mntfh);
+	server = nfs4_create_server(&data, &mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out;
@@ -1790,9 +1741,9 @@
 	error = 0;
 
 out:
-	kfree(ip_addr);
-	kfree(mntpath);
-	kfree(hostname);
+	kfree(data.client_address);
+	kfree(data.nfs_server.export_path);
+	kfree(data.nfs_server.hostname);
 	return error;
 
 out_free:
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 045ab80..1aed850 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -66,7 +66,6 @@
 		.rpc_cred = data->cred,
 	};
 
-	nfs_begin_data_update(dir);
 	NFS_PROTO(dir)->unlink_setup(&msg, dir);
 	rpc_call_setup(task, &msg, 0);
 }
@@ -84,8 +83,6 @@
 
 	if (!NFS_PROTO(dir)->unlink_done(task, dir))
 		rpc_restart_call(task);
-	else
-		nfs_end_data_update(dir);
 }
 
 /**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0d7a77c..e2bb66c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -110,6 +110,13 @@
 	nfs_writedata_free(wdata);
 }
 
+static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
+{
+	ctx->error = error;
+	smp_wmb();
+	set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+}
+
 static struct nfs_page *nfs_page_find_request_locked(struct page *page)
 {
 	struct nfs_page *req = NULL;
@@ -243,10 +250,7 @@
 
 /*
  * Find an associated nfs write request, and prepare to flush it out
- * Returns 1 if there was no write request, or if the request was
- * already tagged by nfs_set_page_dirty.Returns 0 if the request
- * was not tagged.
- * May also return an error if the user signalled nfs_wait_on_request().
+ * May return an error if the user signalled nfs_wait_on_request().
  */
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 				struct page *page)
@@ -261,7 +265,7 @@
 		req = nfs_page_find_request_locked(page);
 		if (req == NULL) {
 			spin_unlock(&inode->i_lock);
-			return 1;
+			return 0;
 		}
 		if (nfs_lock_request_dontget(req))
 			break;
@@ -282,7 +286,7 @@
 		spin_unlock(&inode->i_lock);
 		nfs_unlock_request(req);
 		nfs_pageio_complete(pgio);
-		return 1;
+		return 0;
 	}
 	if (nfs_set_page_writeback(page) != 0) {
 		spin_unlock(&inode->i_lock);
@@ -290,10 +294,20 @@
 	}
 	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
 			NFS_PAGE_TAG_LOCKED);
-	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	spin_unlock(&inode->i_lock);
 	nfs_pageio_add_request(pgio, req);
-	return ret;
+	return 0;
+}
+
+static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
+{
+	struct inode *inode = page->mapping->host;
+
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
+	nfs_pageio_cond_complete(pgio, page->index);
+	return nfs_page_async_flush(pgio, page);
 }
 
 /*
@@ -301,59 +315,35 @@
  */
 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
-	struct nfs_pageio_descriptor mypgio, *pgio;
-	struct nfs_open_context *ctx;
-	struct inode *inode = page->mapping->host;
-	unsigned offset;
+	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
-
-	if (wbc->for_writepages)
-		pgio = wbc->fs_private;
-	else {
-		nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
-		pgio = &mypgio;
-	}
-
-	nfs_pageio_cond_complete(pgio, page->index);
-
-	err = nfs_page_async_flush(pgio, page);
-	if (err <= 0)
-		goto out;
-	err = 0;
-	offset = nfs_page_length(page);
-	if (!offset)
-		goto out;
-
-	nfs_pageio_cond_complete(pgio, page->index);
-
-	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
-	if (ctx == NULL) {
-		err = -EBADF;
-		goto out;
-	}
-	err = nfs_writepage_setup(ctx, page, 0, offset);
-	put_nfs_open_context(ctx);
-	if (err != 0)
-		goto out;
-	err = nfs_page_async_flush(pgio, page);
-	if (err > 0)
-		err = 0;
-out:
-	if (!wbc->for_writepages)
-		nfs_pageio_complete(pgio);
-	return err;
+	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+	err = nfs_do_writepage(page, wbc, &pgio);
+	nfs_pageio_complete(&pgio);
+	if (err < 0)
+		return err;
+	if (pgio.pg_error < 0)
+		return pgio.pg_error;
+	return 0;
 }
 
 int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	int err;
+	int ret;
 
-	err = nfs_writepage_locked(page, wbc);
+	ret = nfs_writepage_locked(page, wbc);
 	unlock_page(page);
-	return err; 
+	return ret;
+}
+
+static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
+{
+	int ret;
+
+	ret = nfs_do_writepage(page, wbc, data);
+	unlock_page(page);
+	return ret;
 }
 
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
@@ -365,12 +355,11 @@
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
 	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
-	wbc->fs_private = &pgio;
-	err = generic_writepages(mapping, wbc);
+	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
-	if (err)
+	if (err < 0)
 		return err;
-	if (pgio.pg_error)
+	if (pgio.pg_error < 0)
 		return pgio.pg_error;
 	return 0;
 }
@@ -389,14 +378,11 @@
 		return error;
 	if (!nfsi->npages) {
 		igrab(inode);
-		nfs_begin_data_update(inode);
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
 	SetPagePrivate(req->wb_page);
 	set_page_private(req->wb_page, (unsigned long)req);
-	if (PageDirty(req->wb_page))
-		set_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfsi->npages++;
 	kref_get(&req->wb_kref);
 	return 0;
@@ -416,12 +402,9 @@
 	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
-	if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags))
-		__set_page_dirty_nobuffers(req->wb_page);
 	nfsi->npages--;
 	if (!nfsi->npages) {
 		spin_unlock(&inode->i_lock);
-		nfs_end_data_update(inode);
 		iput(inode);
 	} else
 		spin_unlock(&inode->i_lock);
@@ -682,7 +665,7 @@
 
 int nfs_flush_incompatible(struct file *file, struct page *page)
 {
-	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct nfs_page	*req;
 	int do_flush, status;
 	/*
@@ -716,7 +699,7 @@
 int nfs_updatepage(struct file *file, struct page *page,
 		unsigned int offset, unsigned int count)
 {
-	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	struct inode	*inode = page->mapping->host;
 	int		status = 0;
 
@@ -967,7 +950,7 @@
 
 	if (task->tk_status < 0) {
 		nfs_set_pageerror(page);
-		req->wb_context->error = task->tk_status;
+		nfs_context_set_write_error(req->wb_context, task->tk_status);
 		dprintk(", error = %d\n", task->tk_status);
 		goto out;
 	}
@@ -1030,7 +1013,7 @@
 
 		if (task->tk_status < 0) {
 			nfs_set_pageerror(page);
-			req->wb_context->error = task->tk_status;
+			nfs_context_set_write_error(req->wb_context, task->tk_status);
 			dprintk(", error = %d\n", task->tk_status);
 			goto remove_request;
 		}
@@ -1244,7 +1227,7 @@
 			req->wb_bytes,
 			(long long)req_offset(req));
 		if (task->tk_status < 0) {
-			req->wb_context->error = task->tk_status;
+			nfs_context_set_write_error(req->wb_context, task->tk_status);
 			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
@@ -1347,53 +1330,52 @@
 	return ret;
 }
 
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
+static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
-	struct address_space *mapping = inode->i_mapping;
+	int ret;
+
+	ret = nfs_writepages(mapping, wbc);
+	if (ret < 0)
+		goto out;
+	ret = nfs_sync_mapping_wait(mapping, wbc, how);
+	if (ret < 0)
+		goto out;
+	return 0;
+out:
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return ret;
+}
+
+/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
+static int nfs_write_mapping(struct address_space *mapping, int how)
+{
 	struct writeback_control wbc = {
 		.bdi = mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
+		.sync_mode = WB_SYNC_NONE,
 		.nr_to_write = LONG_MAX,
 		.for_writepages = 1,
 		.range_cyclic = 1,
 	};
 	int ret;
 
-	ret = nfs_writepages(mapping, &wbc);
+	ret = __nfs_write_mapping(mapping, &wbc, how);
 	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
-	if (ret >= 0)
-		return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
+		return ret;
+	wbc.sync_mode = WB_SYNC_ALL;
+	return __nfs_write_mapping(mapping, &wbc, how);
 }
 
-int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
-	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-		.for_writepages = 1,
-	};
-	int ret;
+	return nfs_write_mapping(inode->i_mapping, 0);
+}
 
-	ret = nfs_writepages(mapping, &wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, &wbc, how);
-	if (ret >= 0)
-		return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
+int nfs_wb_nocommit(struct inode *inode)
+{
+	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
@@ -1477,35 +1459,6 @@
 	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
 }
 
-int nfs_set_page_dirty(struct page *page)
-{
-	struct address_space *mapping = page->mapping;
-	struct inode *inode;
-	struct nfs_page *req;
-	int ret;
-
-	if (!mapping)
-		goto out_raced;
-	inode = mapping->host;
-	if (!inode)
-		goto out_raced;
-	spin_lock(&inode->i_lock);
-	req = nfs_page_find_request_locked(page);
-	if (req != NULL) {
-		/* Mark any existing write requests for flushing */
-		ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
-		spin_unlock(&inode->i_lock);
-		nfs_release_request(req);
-		return ret;
-	}
-	ret = __set_page_dirty_nobuffers(page);
-	spin_unlock(&inode->i_lock);
-	return ret;
-out_raced:
-	return !TestSetPageDirty(page);
-}
-
-
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e15f2cf..5733394 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -102,7 +102,8 @@
 out:						\
 	return status;				\
 xdr_error:					\
-	printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);	\
+	dprintk("NFSD: xdr error (%s:%d)\n",	\
+			__FILE__, __LINE__);	\
 	status = nfserr_bad_xdr;		\
 	goto out
 
@@ -124,7 +125,8 @@
 	if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
  		savemem(argp, p, nbytes) :	\
  		(char *)p)) {			\
-		printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+		dprintk("NFSD: xdr error (%s:%d)\n", \
+				__FILE__, __LINE__); \
 		goto xdr_error;			\
 		}				\
 	p += XDR_QUADLEN(nbytes);		\
@@ -140,7 +142,8 @@
 		p = argp->p;			\
 		argp->p += XDR_QUADLEN(nbytes);	\
 	} else if (!(p = read_buf(argp, nbytes))) { \
-		printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+		dprintk("NFSD: xdr error (%s:%d)\n", \
+				__FILE__, __LINE__); \
 		goto xdr_error;			\
 	}					\
 } while (0)
@@ -948,7 +951,8 @@
 	 */
 	avail = (char*)argp->end - (char*)argp->p;
 	if (avail + argp->pagelen < write->wr_buflen) {
-		printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); 
+		dprintk("NFSD: xdr error (%s:%d)\n",
+				__FILE__, __LINE__);
 		goto xdr_error;
 	}
 	argp->rqstp->rq_vec[0].iov_base = p;
@@ -1019,7 +1023,7 @@
 		argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
 		if (!argp->ops) {
 			argp->ops = argp->iops;
-			printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n");
+			dprintk("nfsd: couldn't allocate room for COMPOUND\n");
 			goto xdr_error;
 		}
 	}
@@ -1326,7 +1330,7 @@
 	path = exp->ex_path;
 
 	if (strncmp(path, rootpath, strlen(rootpath))) {
-		printk("nfsd: fs_locations failed;"
+		dprintk("nfsd: fs_locations failed;"
 			"%s is not contained in %s\n", path, rootpath);
 		*stat = nfserr_notsupp;
 		return NULL;
diff --git a/include/asm-blackfin/mach-bf548/bf54x_keys.h b/include/asm-blackfin/mach-bf548/bf54x_keys.h
new file mode 100644
index 0000000..1fb4ec7
--- /dev/null
+++ b/include/asm-blackfin/mach-bf548/bf54x_keys.h
@@ -0,0 +1,17 @@
+#ifndef _BFIN_KPAD_H
+#define _BFIN_KPAD_H
+
+struct bfin_kpad_platform_data {
+	int rows;
+	int cols;
+	const unsigned int *keymap;
+	unsigned short keymapsize;
+	unsigned short repeat;
+	u32 debounce_time;	/* in ns */
+	u32 coldrive_time;	/* in ns */
+	u32 keyup_test_interval; /* in ms */
+};
+
+#define KEYVAL(col, row, val) (((1 << col) << 24) | ((1 << row) << 16) | (val))
+
+#endif
diff --git a/include/asm-mips/mach-au1x00/prom.h b/include/asm-mips/mach-au1x00/prom.h
new file mode 100644
index 0000000..e387155
--- /dev/null
+++ b/include/asm-mips/mach-au1x00/prom.h
@@ -0,0 +1,13 @@
+#ifndef __AU1X00_PROM_H
+#define __AU1X00_PROM_H
+
+extern int prom_argc;
+extern char **prom_argv;
+extern char **prom_envp;
+
+extern void prom_init_cmdline(void);
+extern char *prom_getcmdline(void);
+extern char *prom_getenv(char *envname);
+extern int prom_get_ethernet_addr(char *ethernet_addr);
+
+#endif
diff --git a/include/asm-powerpc/dcr-mmio.h b/include/asm-powerpc/dcr-mmio.h
index 6b82c3b..08532ff 100644
--- a/include/asm-powerpc/dcr-mmio.h
+++ b/include/asm-powerpc/dcr-mmio.h
@@ -33,16 +33,16 @@
 
 extern dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n,
 			  unsigned int dcr_c);
-extern void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c);
+extern void dcr_unmap(dcr_host_t host, unsigned int dcr_c);
 
 static inline u32 dcr_read(dcr_host_t host, unsigned int dcr_n)
 {
-	return in_be32(host.token + dcr_n * host.stride);
+	return in_be32(host.token + ((host.base + dcr_n) * host.stride));
 }
 
 static inline void dcr_write(dcr_host_t host, unsigned int dcr_n, u32 value)
 {
-	out_be32(host.token + dcr_n * host.stride, value);
+	out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
 }
 
 extern u64 of_translate_dcr_address(struct device_node *dev,
diff --git a/include/asm-powerpc/dcr-native.h b/include/asm-powerpc/dcr-native.h
index f41058c..8dbb1ab 100644
--- a/include/asm-powerpc/dcr-native.h
+++ b/include/asm-powerpc/dcr-native.h
@@ -29,9 +29,9 @@
 #define DCR_MAP_OK(host)	(1)
 
 #define dcr_map(dev, dcr_n, dcr_c)	((dcr_host_t){ .base = (dcr_n) })
-#define dcr_unmap(host, dcr_n, dcr_c)	do {} while (0)
-#define dcr_read(host, dcr_n)		mfdcr(dcr_n)
-#define dcr_write(host, dcr_n, value)	mtdcr(dcr_n, value)
+#define dcr_unmap(host, dcr_c)		do {} while (0)
+#define dcr_read(host, dcr_n)		mfdcr(dcr_n + host.base)
+#define dcr_write(host, dcr_n, value)	mtdcr(dcr_n + host.base, value)
 
 /* Device Control Registers */
 void __mtdcr(int reg, unsigned int val);
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
index eff8585..d058b04 100644
--- a/include/asm-x86/irqflags_32.h
+++ b/include/asm-x86/irqflags_32.h
@@ -160,4 +160,17 @@
 # define TRACE_IRQS_OFF
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LOCKDEP_SYS_EXIT			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call lockdep_sys_exit;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+#else
+# define LOCKDEP_SYS_EXIT
+#endif
+
 #endif
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
index 86e70fe..5341ea1 100644
--- a/include/asm-x86/irqflags_64.h
+++ b/include/asm-x86/irqflags_64.h
@@ -137,6 +137,20 @@
 #  define TRACE_IRQS_ON
 #  define TRACE_IRQS_OFF
 # endif
+# ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  define LOCKDEP_SYS_EXIT	call lockdep_sys_exit_thunk
+#  define LOCKDEP_SYS_EXIT_IRQ	\
+	TRACE_IRQS_ON; \
+	sti; \
+	SAVE_REST; \
+	LOCKDEP_SYS_EXIT; \
+	RESTORE_REST; \
+	cli; \
+	TRACE_IRQS_OFF;
+# else
+#  define LOCKDEP_SYS_EXIT
+#  define LOCKDEP_SYS_EXIT_IRQ
+# endif
 #endif
 
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16421f6..6d760f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1302,8 +1302,14 @@
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
+
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
+
+	struct lock_class_key i_lock_key;
+	struct lock_class_key i_mutex_key;
+	struct lock_class_key i_mutex_dir_key;
+	struct lock_class_key i_alloc_sem_key;
 };
 
 extern int get_sb_bdev(struct file_system_type *fs_type,
diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index 265d178..c6d3a9d 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -8,6 +8,7 @@
 	int active_low;
 	char *desc;
 	int type;		/* input event type (EV_KEY, EV_SW) */
+	int wakeup;		/* configure the button as a wake-up source */
 };
 
 struct gpio_keys_platform_data {
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 99e3a1a..58e43e5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -107,7 +107,7 @@
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
 					       struct sk_buff *skb);
-extern int (*br_should_route_hook)(struct sk_buff **pskb);
+extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
 
diff --git a/include/linux/input.h b/include/linux/input.h
index 52d1bd4..f30da6f 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -856,7 +856,7 @@
  *	defining effect parameters
  *
  * This structure is sent through ioctl from the application to the driver.
- * To create a new effect aplication should set its @id to -1; the kernel
+ * To create a new effect application should set its @id to -1; the kernel
  * will return assigned @id which can later be used to update or delete
  * this effect.
  *
@@ -936,9 +936,82 @@
 #define BIT(x)	(1UL<<((x)%BITS_PER_LONG))
 #define LONG(x) ((x)/BITS_PER_LONG)
 
+/**
+ * struct input_dev - represents an input device
+ * @name: name of the device
+ * @phys: physical path to the device in the system hierarchy
+ * @uniq: unique identification code for the device (if device has it)
+ * @id: id of the device (struct input_id)
+ * @evbit: bitmap of types of events supported by the device (EV_KEY,
+ *	EV_REL, etc.)
+ * @keybit: bitmap of keys/buttons this device has
+ * @relbit: bitmap of relative axes for the device
+ * @absbit: bitmap of absolute axes for the device
+ * @mscbit: bitmap of miscellaneous events supported by the device
+ * @ledbit: bitmap of leds present on the device
+ * @sndbit: bitmap of sound effects supported by the device
+ * @ffbit: bitmap of force feedback effects supported by the device
+ * @swbit: bitmap of switches present on the device
+ * @keycodemax: size of keycode table
+ * @keycodesize: size of elements in keycode table
+ * @keycode: map of scancodes to keycodes for this device
+ * @setkeycode: optional method to alter current keymap, used to implement
+ *	sparse keymaps. If not supplied default mechanism will be used
+ * @getkeycode: optional method to retrieve current keymap. If not supplied
+ *	default mechanism will be used
+ * @ff: force feedback structure associated with the device if device
+ *	supports force feedback effects
+ * @repeat_key: stores key code of the last key pressed; used to implement
+ *	software autorepeat
+ * @timer: timer for software autorepeat
+ * @sync: set to 1 when there were no new events since last EV_SYNC
+ * @abs: current values for reports from absolute axes
+ * @rep: current values for autorepeat parameters (delay, rate)
+ * @key: reflects current state of device's keys/buttons
+ * @led: reflects current state of device's LEDs
+ * @snd: reflects current state of sound effects
+ * @sw: reflects current state of device's switches
+ * @absmax: maximum values for events coming from absolute axes
+ * @absmin: minimum values for events coming from absolute axes
+ * @absfuzz: describes noisiness for axes
+ * @absflat: size of the center flat position (used by joydev)
+ * @open: this method is called when the very first user calls
+ *	input_open_device(). The driver must prepare the device
+ *	to start generating events (start polling thread,
+ *	request an IRQ, submit URB, etc.)
+ * @close: this method is called when the very last user calls
+ *	input_close_device().
+ * @flush: purges the device. Most commonly used to get rid of force
+ *	feedback effects loaded into the device when disconnecting
+ *	from it
+ * @event: event handler for events sent _to_ the device, like EV_LED
+ *	or EV_SND. The device is expected to carry out the requested
+ *	action (turn on a LED, play sound, etc.) The call is protected
+ *	by @event_lock and must not sleep
+ * @grab: input handle that currently has the device grabbed (via
+ *	EVIOCGRAB ioctl). When a handle grabs a device it becomes sole
+ *	recipient for all input events coming from the device
+ * @event_lock: this spinlock is is taken when input core receives
+ *	and processes a new event for the device (in input_event()).
+ *	Code that accesses and/or modifies parameters of a device
+ *	(such as keymap or absmin, absmax, absfuzz, etc.) after device
+ *	has been registered with input core must take this lock.
+ * @mutex: serializes calls to open(), close() and flush() methods
+ * @users: stores number of users (input handlers) that opened this
+ *	device. It is used by input_open_device() and input_close_device()
+ *	to make sure that dev->open() is only called when the first
+ *	user opens device and dev->close() is called when the very
+ *	last user closes the device
+ * @going_away: marks devices that are in a middle of unregistering and
+ *	causes input_open_device*() fail with -ENODEV.
+ * @dev: driver model's view of this device
+ * @h_list: list of input handles associated with the device. When
+ *	accessing the list dev->mutex must be held
+ * @node: used to place the device onto input_dev_list
+ */
 struct input_dev {
 
-	void *private;
+	void *private;	/* do not use */
 
 	const char *name;
 	const char *phys;
@@ -966,8 +1039,6 @@
 	unsigned int repeat_key;
 	struct timer_list timer;
 
-	int state;
-
 	int sync;
 
 	int abs[ABS_MAX + 1];
@@ -990,8 +1061,11 @@
 
 	struct input_handle *grab;
 
-	struct mutex mutex;	/* serializes open and close operations */
+	spinlock_t event_lock;
+	struct mutex mutex;
+
 	unsigned int users;
+	int going_away;
 
 	struct device dev;
 	union {			/* temporarily so while we switching to struct device */
@@ -1057,7 +1131,9 @@
 /**
  * struct input_handler - implements one of interfaces for input devices
  * @private: driver-specific data
- * @event: event handler
+ * @event: event handler. This method is being called by input core with
+ *	interrupts disabled and dev->event_lock spinlock held and so
+ *	it may not sleep
  * @connect: called when attaching a handler to an input device
  * @disconnect: disconnects a handler from input device
  * @start: starts handler for given handle. This function is called by
@@ -1069,10 +1145,18 @@
  * @name: name of the handler, to be shown in /proc/bus/input/handlers
  * @id_table: pointer to a table of input_device_ids this driver can
  *	handle
- * @blacklist: prointer to a table of input_device_ids this driver should
+ * @blacklist: pointer to a table of input_device_ids this driver should
  *	ignore even if they match @id_table
  * @h_list: list of input handles associated with the handler
  * @node: for placing the driver onto input_handler_list
+ *
+ * Input handlers attach to input devices and create input handles. There
+ * are likely several handlers attached to any given input device at the
+ * same time. All of them will get their copy of input event generated by
+ * the device.
+ *
+ * Note that input core serializes calls to connect() and disconnect()
+ * methods.
  */
 struct input_handler {
 
@@ -1094,6 +1178,18 @@
 	struct list_head	node;
 };
 
+/**
+ * struct input_handle - links input device with an input handler
+ * @private: handler-specific data
+ * @open: counter showing whether the handle is 'open', i.e. should deliver
+ *	events from its device
+ * @name: name given to the handle by handler that created it
+ * @dev: input device the handle is attached to
+ * @handler: handler that works with the device through this handle
+ * @d_node: used to put the handle on device's list of attached handles
+ * @h_node: used to put the handle on handler's list of handles from which
+ *	it gets events
+ */
 struct input_handle {
 
 	void *private;
@@ -1136,10 +1232,10 @@
 	dev->private = data;
 }
 
-int input_register_device(struct input_dev *);
+int __must_check input_register_device(struct input_dev *);
 void input_unregister_device(struct input_dev *);
 
-int input_register_handler(struct input_handler *);
+int __must_check input_register_handler(struct input_handler *);
 void input_unregister_handler(struct input_handler *);
 
 int input_register_handle(struct input_handle *);
@@ -1216,7 +1312,7 @@
  * @max_effects: maximum number of effects supported by device
  * @effects: pointer to an array of effects currently loaded into device
  * @effect_owners: array of effect owners; when file handle owning
- *	an effect gets closed the effcet is automatically erased
+ *	an effect gets closed the effect is automatically erased
  *
  * Every force-feedback device must implement upload() and playback()
  * methods; erase() is optional. set_gain() and set_autocenter() need
diff --git a/include/linux/isdn.h b/include/linux/isdn.h
index ad09506..d5dda4b 100644
--- a/include/linux/isdn.h
+++ b/include/linux/isdn.h
@@ -286,7 +286,6 @@
 /* Local interface-data */
 typedef struct isdn_net_local_s {
   ulong                  magic;
-  char                   name[10];     /* Name of device                   */
   struct net_device_stats stats;       /* Ethernet Statistics              */
   int                    isdn_device;  /* Index to isdn-device             */
   int                    isdn_channel; /* Index to isdn-channel            */
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 4527375..700a93b 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -30,6 +30,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
+#include <linux/lockdep.h>
 
 #include <asm/semaphore.h>
 #endif
@@ -396,6 +397,10 @@
 	unsigned int	h_sync:		1;	/* sync-on-close */
 	unsigned int	h_jdata:	1;	/* force data journaling */
 	unsigned int	h_aborted:	1;	/* fatal error on handle */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	h_lockdep_map;
+#endif
 };
 
 
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index d7a5e03..e757a74 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -109,6 +109,10 @@
 	 ((long)(a) - (long)(b) >= 0))
 #define time_before_eq(a,b)	time_after_eq(b,a)
 
+#define time_in_range(a,b,c) \
+	(time_after_eq(a,b) && \
+	 time_before_eq(a,c))
+
 /* Same as above, but does so with platform independent 64bit types.
  * These must be used when utilizing jiffies_64 (i.e. return value of
  * get_jiffies_64() */
diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index 7ddbc30..33b5c2e 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -416,6 +416,7 @@
 #define K_SHIFTRLOCK	K(KT_LOCK,KG_SHIFTR)
 #define K_CTRLLLOCK	K(KT_LOCK,KG_CTRLL)
 #define K_CTRLRLOCK	K(KT_LOCK,KG_CTRLR)
+#define K_CAPSSHIFTLOCK	K(KT_LOCK,KG_CAPSSHIFT)
 
 #define K_SHIFT_SLOCK	K(KT_SLOCK,KG_SHIFT)
 #define K_CTRL_SLOCK	K(KT_SLOCK,KG_CTRL)
@@ -425,8 +426,9 @@
 #define K_SHIFTR_SLOCK	K(KT_SLOCK,KG_SHIFTR)
 #define K_CTRLL_SLOCK	K(KT_SLOCK,KG_CTRLL)
 #define K_CTRLR_SLOCK	K(KT_SLOCK,KG_CTRLR)
+#define K_CAPSSHIFT_SLOCK	K(KT_SLOCK,KG_CAPSSHIFT)
 
-#define NR_LOCK		8
+#define NR_LOCK		9
 
 #define K_BRL_BLANK     K(KT_BRL, 0)
 #define K_BRL_DOT1      K(KT_BRL, 1)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0e843bf..f6279f6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -238,6 +238,7 @@
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
+extern void lockdep_sys_exit(void);
 
 extern void lockdep_off(void);
 extern void lockdep_on(void);
@@ -252,6 +253,13 @@
 			     struct lock_class_key *key, int subclass);
 
 /*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+/*
  * Reinitialize a lock key - for cases where there is special locking or
  * special initialization of locks so that the validator gets the scope
  * of dependencies wrong: they are either too broad (they need a class-split)
@@ -317,6 +325,7 @@
 # define INIT_LOCKDEP
 # define lockdep_reset()		do { debug_locks = 1; } while (0)
 # define lockdep_free_key_range(start, size)	do { } while (0)
+# define lockdep_sys_exit() 			do { } while (0)
 /*
  * The class key takes no space if lockdep is disabled:
  */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 0d50ea3..6a735c7 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -120,14 +120,17 @@
  * See kernel/mutex.c for detailed documentation of these APIs.
  * Also see Documentation/mutex-design.txt.
  */
-extern void fastcall mutex_lock(struct mutex *lock);
-extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
+
+#define mutex_lock(lock) mutex_lock_nested(lock, 0)
+#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #else
+extern void fastcall mutex_lock(struct mutex *lock);
+extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
+
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 #endif
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 1dd075e..16adac6 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -51,7 +51,7 @@
 struct net_device;
 
 typedef unsigned int nf_hookfn(unsigned int hooknum,
-			       struct sk_buff **skb,
+			       struct sk_buff *skb,
 			       const struct net_device *in,
 			       const struct net_device *out,
 			       int (*okfn)(struct sk_buff *));
@@ -183,7 +183,7 @@
 		   struct nf_loginfo *li,
 		   const char *fmt, ...);
 
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev, struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *), int thresh);
 
@@ -195,7 +195,7 @@
  *	value indicates the packet has been consumed by the hook.
  */
 static inline int nf_hook_thresh(int pf, unsigned int hook,
-				 struct sk_buff **pskb,
+				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
 				 int (*okfn)(struct sk_buff *), int thresh,
@@ -207,14 +207,14 @@
 	if (list_empty(&nf_hooks[pf][hook]))
 		return 1;
 #endif
-	return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
+	return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
 }
 
-static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
+static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1);
+	return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1);
 }
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -241,13 +241,13 @@
 
 #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
 #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
@@ -287,7 +287,7 @@
 /* Call this before modifying an existing packet: ensures it is
    modifiable and linear to the point you care about (writable_len).
    Returns true or false. */
-extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
+extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
 
 static inline void nf_csum_replace4(__sum16 *sum, __be32 from, __be32 to)
 {
@@ -317,7 +317,7 @@
 				    unsigned int dataoff, u_int8_t protocol);
 	void		(*saveroute)(const struct sk_buff *skb,
 				     struct nf_info *info);
-	int		(*reroute)(struct sk_buff **skb,
+	int		(*reroute)(struct sk_buff *skb,
 				   const struct nf_info *info);
 	int		route_key_size;
 };
@@ -371,15 +371,15 @@
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
 #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
 static inline int nf_hook_thresh(int pf, unsigned int hook,
-				 struct sk_buff **pskb,
+				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
 				 int (*okfn)(struct sk_buff *), int thresh,
 				 int cond)
 {
-	return okfn(*pskb);
+	return okfn(skb);
 }
-static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
+static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h
index 26c2235..0bb5a69 100644
--- a/include/linux/netfilter/nf_conntrack_amanda.h
+++ b/include/linux/netfilter/nf_conntrack_amanda.h
@@ -2,7 +2,7 @@
 #define _NF_CONNTRACK_AMANDA_H
 /* AMANDA tracking. */
 
-extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
 					  enum ip_conntrack_info ctinfo,
 					  unsigned int matchoff,
 					  unsigned int matchlen,
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index b7c360f..47727d7 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -32,7 +32,7 @@
 
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
-extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
 				       enum ip_conntrack_info ctinfo,
 				       enum nf_ct_ftp_type type,
 				       unsigned int matchoff,
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index 08e2f49..aabd24a 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -36,27 +36,27 @@
 				     struct nf_conntrack_expect *this);
 extern void nf_conntrack_q931_expect(struct nf_conn *new,
 				     struct nf_conntrack_expect *this);
-extern int (*set_h245_addr_hook) (struct sk_buff **pskb,
+extern int (*set_h245_addr_hook) (struct sk_buff *skb,
 				  unsigned char **data, int dataoff,
 				  H245_TransportAddress *taddr,
 				  union nf_conntrack_address *addr,
 				  __be16 port);
-extern int (*set_h225_addr_hook) (struct sk_buff **pskb,
+extern int (*set_h225_addr_hook) (struct sk_buff *skb,
 				  unsigned char **data, int dataoff,
 				  TransportAddress *taddr,
 				  union nf_conntrack_address *addr,
 				  __be16 port);
-extern int (*set_sig_addr_hook) (struct sk_buff **pskb,
+extern int (*set_sig_addr_hook) (struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
 				 unsigned char **data,
 				 TransportAddress *taddr, int count);
-extern int (*set_ras_addr_hook) (struct sk_buff **pskb,
+extern int (*set_ras_addr_hook) (struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
 				 unsigned char **data,
 				 TransportAddress *taddr, int count);
-extern int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
 				 unsigned char **data, int dataoff,
@@ -64,24 +64,24 @@
 				 __be16 port, __be16 rtp_port,
 				 struct nf_conntrack_expect *rtp_exp,
 				 struct nf_conntrack_expect *rtcp_exp);
-extern int (*nat_t120_hook) (struct sk_buff **pskb, struct nf_conn *ct,
+extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo,
 			     unsigned char **data, int dataoff,
 			     H245_TransportAddress *taddr, __be16 port,
 			     struct nf_conntrack_expect *exp);
-extern int (*nat_h245_hook) (struct sk_buff **pskb, struct nf_conn *ct,
+extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo,
 			     unsigned char **data, int dataoff,
 			     TransportAddress *taddr, __be16 port,
 			     struct nf_conntrack_expect *exp);
-extern int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+extern int (*nat_callforwarding_hook) (struct sk_buff *skb,
 				       struct nf_conn *ct,
 				       enum ip_conntrack_info ctinfo,
 				       unsigned char **data, int dataoff,
 				       TransportAddress *taddr,
 				       __be16 port,
 				       struct nf_conntrack_expect *exp);
-extern int (*nat_q931_hook) (struct sk_buff **pskb, struct nf_conn *ct,
+extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo,
 			     unsigned char **data, TransportAddress *taddr,
 			     int idx, __be16 port,
diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h
index 2ab6b82..36282bf 100644
--- a/include/linux/netfilter/nf_conntrack_irc.h
+++ b/include/linux/netfilter/nf_conntrack_irc.h
@@ -5,7 +5,7 @@
 
 #define IRC_PORT	6667
 
-extern unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
 				       enum ip_conntrack_info ctinfo,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index c93061f..2343549 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -301,13 +301,13 @@
 struct nf_conntrack_expect;
 
 extern int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
 			     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			     struct PptpControlHeader *ctlh,
 			     union pptp_ctrl_union *pptpReq);
 
 extern int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
 			    struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			    struct PptpControlHeader *ctlh,
 			    union pptp_ctrl_union *pptpReq);
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index bb7f204..9fff197 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -21,11 +21,11 @@
 	POS_SDP_HEADER,
 };
 
-extern unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
 				       enum ip_conntrack_info ctinfo,
 				       struct nf_conn *ct,
 				       const char **dptr);
-extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
 				       enum ip_conntrack_info ctinfo,
 				       struct nf_conntrack_expect *exp,
 				       const char *dptr);
diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h
index 0d79b7a..c78d38f 100644
--- a/include/linux/netfilter/nf_conntrack_tftp.h
+++ b/include/linux/netfilter/nf_conntrack_tftp.h
@@ -13,7 +13,7 @@
 #define TFTP_OPCODE_ACK		4
 #define TFTP_OPCODE_ERROR	5
 
-extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
 				        enum ip_conntrack_info ctinfo,
 				        struct nf_conntrack_expect *exp);
 
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 64f425a8..03e6ce9 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -191,7 +191,7 @@
 	/* Returns verdict. Argument order changed since 2.6.9, as this
 	   must now handle non-linear skbs, using skb_copy_bits and
 	   skb_ip_make_writable. */
-	unsigned int (*target)(struct sk_buff **pskb,
+	unsigned int (*target)(struct sk_buff *skb,
 			       const struct net_device *in,
 			       const struct net_device *out,
 			       unsigned int hooknum,
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 584cd1b..2fc73fa 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -287,7 +287,7 @@
 extern int arpt_register_table(struct arpt_table *table,
 			       const struct arpt_replace *repl);
 extern void arpt_unregister_table(struct arpt_table *table);
-extern unsigned int arpt_do_table(struct sk_buff **pskb,
+extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 94e0a7d..892f5b7 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -237,7 +237,7 @@
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
 	/* returns one of the standard verdicts */
-	int (*target)(struct sk_buff **pskb, unsigned int hooknr,
+	int (*target)(struct sk_buff *skb, unsigned int hooknr,
 	   const struct net_device *in, const struct net_device *out,
 	   const void *targetdata, unsigned int datalen);
 	/* 0 == let it in */
@@ -294,7 +294,7 @@
 extern void ebt_unregister_watcher(struct ebt_watcher *watcher);
 extern int ebt_register_target(struct ebt_target *target);
 extern void ebt_unregister_target(struct ebt_target *target);
-extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff **pskb,
+extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
    struct ebt_table *table);
 
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index ceae87a..1a63adf 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -75,8 +75,8 @@
 #define SO_ORIGINAL_DST 80
 
 #ifdef __KERNEL__
-extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
-extern int ip_xfrm_me_harder(struct sk_buff **pskb);
+extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
+extern int ip_xfrm_me_harder(struct sk_buff *skb);
 extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 				   unsigned int dataoff, u_int8_t protocol);
 #endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index e992cd6..d79ed69 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -337,7 +337,7 @@
 	.target.errorname = "ERROR",					       \
 }
 
-extern unsigned int ipt_do_table(struct sk_buff **pskb,
+extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 unsigned int hook,
 				 const struct net_device *in,
 				 const struct net_device *out,
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 9a720f0..7dc481c 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -336,7 +336,7 @@
 extern int ip6t_register_table(struct xt_table *table,
 			       const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct xt_table *table);
-extern unsigned int ip6t_do_table(struct sk_buff **pskb,
+extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7250eea..c5164c2 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -47,10 +47,8 @@
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_xdr.h>
-
 #include <linux/nfs_fs_sb.h>
 
-#include <linux/rwsem.h>
 #include <linux/mempool.h>
 
 /*
@@ -77,6 +75,9 @@
 	struct nfs4_state *state;
 	fl_owner_t lockowner;
 	int mode;
+
+	unsigned long flags;
+#define NFS_CONTEXT_ERROR_WRITE		(0)
 	int error;
 
 	struct list_head list;
@@ -133,11 +134,6 @@
 	 * server.
 	 */
 	unsigned long		cache_change_attribute;
-	/*
-	 * Counter indicating the number of outstanding requests that
-	 * will cause a file data update.
-	 */
-	atomic_t		data_updates;
 
 	struct rb_root		access_cache;
 	struct list_head	access_cache_entry_lru;
@@ -205,27 +201,18 @@
 #define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
 #define NFS_PROTO(inode)		(NFS_SERVER(inode)->nfs_client->rpc_ops)
 #define NFS_COOKIEVERF(inode)		(NFS_I(inode)->cookieverf)
-#define NFS_READTIME(inode)		(NFS_I(inode)->read_cache_jiffies)
-#define NFS_CHANGE_ATTR(inode)		(NFS_I(inode)->change_attr)
-#define NFS_ATTRTIMEO(inode)		(NFS_I(inode)->attrtimeo)
 #define NFS_MINATTRTIMEO(inode) \
 	(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
 			       : NFS_SERVER(inode)->acregmin)
 #define NFS_MAXATTRTIMEO(inode) \
 	(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
 			       : NFS_SERVER(inode)->acregmax)
-#define NFS_ATTRTIMEO_UPDATE(inode)	(NFS_I(inode)->attrtimeo_timestamp)
 
 #define NFS_FLAGS(inode)		(NFS_I(inode)->flags)
 #define NFS_STALE(inode)		(test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
 
 #define NFS_FILEID(inode)		(NFS_I(inode)->fileid)
 
-static inline int nfs_caches_unstable(struct inode *inode)
-{
-	return atomic_read(&NFS_I(inode)->data_updates) != 0;
-}
-
 static inline void nfs_mark_for_revalidate(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -237,12 +224,6 @@
 	spin_unlock(&inode->i_lock);
 }
 
-static inline void NFS_CACHEINV(struct inode *inode)
-{
-	if (!nfs_caches_unstable(inode))
-		nfs_mark_for_revalidate(inode);
-}
-
 static inline int nfs_server_capable(struct inode *inode, int cap)
 {
 	return NFS_SERVER(inode)->caps & cap;
@@ -253,28 +234,33 @@
 	return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
 }
 
-/**
- * nfs_save_change_attribute - Returns the inode attribute change cookie
- * @inode - pointer to inode
- * The "change attribute" is updated every time we finish an operation
- * that will result in a metadata change on the server.
- */
-static inline long nfs_save_change_attribute(struct inode *inode)
+static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
 {
-	return NFS_I(inode)->cache_change_attribute;
+	dentry->d_time = verf;
 }
 
 /**
- * nfs_verify_change_attribute - Detects NFS inode cache updates
- * @inode - pointer to inode
- * @chattr - previously saved change attribute
- * Return "false" if metadata has been updated (or is in the process of
- * being updated) since the change attribute was saved.
+ * nfs_save_change_attribute - Returns the inode attribute change cookie
+ * @dir - pointer to parent directory inode
+ * The "change attribute" is updated every time we finish an operation
+ * that will result in a metadata change on the server.
  */
-static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
+static inline unsigned long nfs_save_change_attribute(struct inode *dir)
 {
-	return !nfs_caches_unstable(inode)
-		&& time_after_eq(chattr, NFS_I(inode)->cache_change_attribute);
+	return NFS_I(dir)->cache_change_attribute;
+}
+
+/**
+ * nfs_verify_change_attribute - Detects NFS remote directory changes
+ * @dir - pointer to parent directory inode
+ * @chattr - previously saved change attribute
+ * Return "false" if the verifiers doesn't match the change attribute.
+ * This would usually indicate that the directory contents have changed on
+ * the server, and that any dentries need revalidating.
+ */
+static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr)
+{
+	return chattr == NFS_I(dir)->cache_change_attribute;
 }
 
 /*
@@ -283,15 +269,14 @@
 extern int nfs_sync_mapping(struct address_space *mapping);
 extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping);
 extern void nfs_zap_caches(struct inode *);
+extern void nfs_invalidate_atime(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
+extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int nfs_permission(struct inode *, int, struct nameidata *);
-extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
-extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
-extern void nfs_access_zap_cache(struct inode *inode);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
@@ -301,13 +286,10 @@
 extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
-extern void nfs_begin_attr_update(struct inode *);
-extern void nfs_end_attr_update(struct inode *);
-extern void nfs_begin_data_update(struct inode *);
-extern void nfs_end_data_update(struct inode *);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
+extern u64 nfs_compat_user_ino64(u64 fileid);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern __be32 root_nfs_parse_addr(char *name); /*__init*/
@@ -328,14 +310,15 @@
 extern const struct file_operations nfs_file_operations;
 extern const struct address_space_operations nfs_file_aops;
 
+static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
+{
+	return filp->private_data;
+}
+
 static inline struct rpc_cred *nfs_file_cred(struct file *file)
 {
-	if (file != NULL) {
-		struct nfs_open_context *ctx;
-
-		ctx = (struct nfs_open_context*)file->private_data;
-		return ctx->cred;
-	}
+	if (file != NULL)
+		return nfs_file_open_context(file)->cred;
 	return NULL;
 }
 
@@ -378,6 +361,8 @@
 extern struct dentry_operations nfs_dentry_operations;
 
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
+extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
+extern void nfs_access_zap_cache(struct inode *inode);
 
 /*
  * linux/fs/nfs/symlink.c
@@ -420,15 +405,14 @@
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 extern void nfs_writedata_release(void *);
-extern int nfs_set_page_dirty(struct page *);
 
 /*
  * Try to write back everything synchronously (but check the
  * return value!)
  */
 extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
-extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int);
 extern int nfs_wb_all(struct inode *inode);
+extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 78e6079..30dbcc1 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -30,7 +30,6 @@
 #define PG_BUSY			0
 #define PG_NEED_COMMIT		1
 #define PG_NEED_RESCHED		2
-#define PG_NEED_FLUSH		3
 
 struct nfs_inode;
 struct nfs_page {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index cf74a4d..daab252 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -62,7 +62,8 @@
 #define NFS_ATTR_FATTR		0x0002		/* post-op attributes */
 #define NFS_ATTR_FATTR_V3	0x0004		/* NFSv3 attributes */
 #define NFS_ATTR_FATTR_V4	0x0008		/* NFSv4 change attribute */
-#define NFS_ATTR_FATTR_V4_REFERRAL	0x0010		/* NFSv4 referral */
+#define NFS_ATTR_WCC_V4		0x0010		/* pre-op change attribute */
+#define NFS_ATTR_FATTR_V4_REFERRAL	0x0020		/* NFSv4 referral */
 
 /*
  * Info on the file system
@@ -538,10 +539,13 @@
 
 struct nfs4_accessargs {
 	const struct nfs_fh *		fh;
+	const u32 *			bitmask;
 	u32				access;
 };
 
 struct nfs4_accessres {
+	const struct nfs_server *	server;
+	struct nfs_fattr *		fattr;
 	u32				supported;
 	u32				access;
 };
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fe17d7d..76c1a53 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,6 +41,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
+#include <linux/lockdep.h>
 
 /**
  * struct rcu_head - callback structure for use with RCU
@@ -133,6 +134,15 @@
 extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire()	lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
+# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire()	do { } while (0)
+# define rcu_read_release()	do { } while (0)
+#endif
+
 /**
  * rcu_read_lock - mark the beginning of an RCU read-side critical section.
  *
@@ -166,6 +176,7 @@
 	do { \
 		preempt_disable(); \
 		__acquire(RCU); \
+		rcu_read_acquire(); \
 	} while(0)
 
 /**
@@ -175,6 +186,7 @@
  */
 #define rcu_read_unlock() \
 	do { \
+		rcu_read_release(); \
 		__release(RCU); \
 		preempt_enable(); \
 	} while(0)
@@ -204,6 +216,7 @@
 	do { \
 		local_bh_disable(); \
 		__acquire(RCU_BH); \
+		rcu_read_acquire(); \
 	} while(0)
 
 /*
@@ -213,6 +226,7 @@
  */
 #define rcu_read_unlock_bh() \
 	do { \
+		rcu_read_release(); \
 		__release(RCU_BH); \
 		local_bh_enable(); \
 	} while(0)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a656cec..f93f22b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -357,6 +357,7 @@
 }
 
 extern void	       kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
 				 gfp_t priority);
 extern struct sk_buff *skb_copy(const struct sk_buff *skb,
@@ -1781,6 +1782,11 @@
 	return skb_shinfo(skb)->gso_size;
 }
 
+static inline int skb_is_gso_v6(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
+}
+
 static inline void skb_forward_csum(struct sk_buff *skb)
 {
 	/* Unfortunately we don't support this one.  Any brave souls? */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c0d9d14..d9d5c5a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -117,7 +117,7 @@
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
-				struct rpc_program *, int);
+				struct rpc_program *, u32);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 3912cf1..3347c72 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -88,6 +88,11 @@
 	CTL_SLOTTABLE_TCP,
 	CTL_MIN_RESVPORT,
 	CTL_MAX_RESVPORT,
+	CTL_SLOTTABLE_RDMA,
+	CTL_RDMA_MAXINLINEREAD,
+	CTL_RDMA_MAXINLINEWRITE,
+	CTL_RDMA_WRITEPADDING,
+	CTL_RDMA_MEMREG,
 };
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 784d4c3..c4beb57 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -138,6 +138,19 @@
 #define RPC_MAX_HEADER_WITH_AUTH \
 	(RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4))
 
+/*
+ * RFC1833/RFC3530 rpcbind (v3+) well-known netid's.
+ */
+#define RPCBIND_NETID_UDP	"udp"
+#define RPCBIND_NETID_TCP	"tcp"
+#define RPCBIND_NETID_UDP6	"udp6"
+#define RPCBIND_NETID_TCP6	"tcp6"
+
+/*
+ * Note that RFC 1833 does not put any size restrictions on the
+ * netid string, but all currently defined netid's fit in 4 bytes.
+ */
+#define RPCBIND_MAXNETIDLEN	(4u)
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_MSGPROT_H_ */
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
new file mode 100644
index 0000000..0013a0d
--- /dev/null
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_RPC_RDMA_H
+#define _LINUX_SUNRPC_RPC_RDMA_H
+
+struct rpcrdma_segment {
+	uint32_t rs_handle;	/* Registered memory handle */
+	uint32_t rs_length;	/* Length of the chunk in bytes */
+	uint64_t rs_offset;	/* Chunk virtual address or offset */
+};
+
+/*
+ * read chunk(s), encoded as a linked list.
+ */
+struct rpcrdma_read_chunk {
+	uint32_t rc_discrim;	/* 1 indicates presence */
+	uint32_t rc_position;	/* Position in XDR stream */
+	struct rpcrdma_segment rc_target;
+};
+
+/*
+ * write chunk, and reply chunk.
+ */
+struct rpcrdma_write_chunk {
+	struct rpcrdma_segment wc_target;
+};
+
+/*
+ * write chunk(s), encoded as a counted array.
+ */
+struct rpcrdma_write_array {
+	uint32_t wc_discrim;	/* 1 indicates presence */
+	uint32_t wc_nchunks;	/* Array count */
+	struct rpcrdma_write_chunk wc_array[0];
+};
+
+struct rpcrdma_msg {
+	uint32_t rm_xid;	/* Mirrors the RPC header xid */
+	uint32_t rm_vers;	/* Version of this protocol */
+	uint32_t rm_credit;	/* Buffers requested/granted */
+	uint32_t rm_type;	/* Type of message (enum rpcrdma_proc) */
+	union {
+
+		struct {			/* no chunks */
+			uint32_t rm_empty[3];	/* 3 empty chunk lists */
+		} rm_nochunks;
+
+		struct {			/* no chunks and padded */
+			uint32_t rm_align;	/* Padding alignment */
+			uint32_t rm_thresh;	/* Padding threshold */
+			uint32_t rm_pempty[3];	/* 3 empty chunk lists */
+		} rm_padded;
+
+		uint32_t rm_chunks[0];	/* read, write and reply chunks */
+
+	} rm_body;
+};
+
+#define RPCRDMA_HDRLEN_MIN	28
+
+enum rpcrdma_errcode {
+	ERR_VERS = 1,
+	ERR_CHUNK = 2
+};
+
+struct rpcrdma_err_vers {
+	uint32_t rdma_vers_low;	/* Version range supported by peer */
+	uint32_t rdma_vers_high;
+};
+
+enum rpcrdma_proc {
+	RDMA_MSG = 0,		/* An RPC call or reply msg */
+	RDMA_NOMSG = 1,		/* An RPC call or reply msg - separate body */
+	RDMA_MSGP = 2,		/* An RPC call or reply msg with padding */
+	RDMA_DONE = 3,		/* Client signals reply completion */
+	RDMA_ERROR = 4		/* An RPC RDMA encoding error */
+};
+
+#endif				/* _LINUX_SUNRPC_RPC_RDMA_H */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index c6b53d1..0751c94 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -70,7 +70,10 @@
 
 	struct page **	pages;		/* Array of contiguous pages */
 	unsigned int	page_base,	/* Start of page data */
-			page_len;	/* Length of page data */
+			page_len,	/* Length of page data */
+			flags;		/* Flags for data disposition */
+#define XDRBUF_READ		0x01		/* target of file read */
+#define XDRBUF_WRITE		0x02		/* source of file write */
 
 	unsigned int	buflen,		/* Total length of storage buffer */
 			len;		/* Length of XDR encoded message */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index d11cedd..30b17b3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -19,25 +19,11 @@
 
 #ifdef __KERNEL__
 
-extern unsigned int xprt_udp_slot_table_entries;
-extern unsigned int xprt_tcp_slot_table_entries;
-
 #define RPC_MIN_SLOT_TABLE	(2U)
 #define RPC_DEF_SLOT_TABLE	(16U)
 #define RPC_MAX_SLOT_TABLE	(128U)
 
 /*
- * Parameters for choosing a free port
- */
-extern unsigned int xprt_min_resvport;
-extern unsigned int xprt_max_resvport;
-
-#define RPC_MIN_RESVPORT	(1U)
-#define RPC_MAX_RESVPORT	(65535U)
-#define RPC_DEF_MIN_RESVPORT	(665U)
-#define RPC_DEF_MAX_RESVPORT	(1023U)
-
-/*
  * This describes a timeout strategy
  */
 struct rpc_timeout {
@@ -53,6 +39,10 @@
 	RPC_DISPLAY_PORT,
 	RPC_DISPLAY_PROTO,
 	RPC_DISPLAY_ALL,
+	RPC_DISPLAY_HEX_ADDR,
+	RPC_DISPLAY_HEX_PORT,
+	RPC_DISPLAY_UNIVERSAL_ADDR,
+	RPC_DISPLAY_NETID,
 	RPC_DISPLAY_MAX,
 };
 
@@ -196,14 +186,22 @@
 	char *			address_strings[RPC_DISPLAY_MAX];
 };
 
-struct rpc_xprtsock_create {
-	int			proto;		/* IPPROTO_UDP or IPPROTO_TCP */
+struct xprt_create {
+	int			ident;		/* XPRT_TRANSPORT identifier */
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
 	struct rpc_timeout *	timeout;	/* optional timeout parameters */
 };
 
+struct xprt_class {
+	struct list_head	list;
+	int			ident;		/* XPRT_TRANSPORT identifier */
+	struct rpc_xprt *	(*setup)(struct xprt_create *);
+	struct module		*owner;
+	char			name[32];
+};
+
 /*
  * Transport operations used by ULPs
  */
@@ -212,7 +210,7 @@
 /*
  * Generic internal transport functions
  */
-struct rpc_xprt *	xprt_create_transport(struct rpc_xprtsock_create *args);
+struct rpc_xprt		*xprt_create_transport(struct xprt_create *args);
 void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_task *task);
@@ -235,6 +233,8 @@
 /*
  * Transport switch helper functions
  */
+int			xprt_register_transport(struct xprt_class *type);
+int			xprt_unregister_transport(struct xprt_class *type);
 void			xprt_set_retrans_timeout_def(struct rpc_task *task);
 void			xprt_set_retrans_timeout_rtt(struct rpc_task *task);
 void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
@@ -248,14 +248,6 @@
 void			xprt_disconnect(struct rpc_xprt *xprt);
 
 /*
- * Socket transport setup operations
- */
-struct rpc_xprt *	xs_setup_udp(struct rpc_xprtsock_create *args);
-struct rpc_xprt *	xs_setup_tcp(struct rpc_xprtsock_create *args);
-int			init_socket_xprt(void);
-void			cleanup_socket_xprt(void);
-
-/*
  * Reserved bit positions in xprt->state
  */
 #define XPRT_LOCKED		(0)
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
new file mode 100644
index 0000000..4de56b1
--- /dev/null
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRTRDMA_H
+#define _LINUX_SUNRPC_XPRTRDMA_H
+
+/*
+ * RPC transport identifier for RDMA
+ */
+#define XPRT_TRANSPORT_RDMA	256
+
+/*
+ * rpcbind (v3+) RDMA netid.
+ */
+#define RPCBIND_NETID_RDMA	"rdma"
+
+/*
+ * Constants. Max RPC/NFS header is big enough to account for
+ * additional marshaling buffers passed down by Linux client.
+ *
+ * RDMA header is currently fixed max size, and is big enough for a
+ * fully-chunked NFS message (read chunks are the largest). Note only
+ * a single chunk type per message is supported currently.
+ */
+#define RPCRDMA_MIN_SLOT_TABLE	(2U)
+#define RPCRDMA_DEF_SLOT_TABLE	(32U)
+#define RPCRDMA_MAX_SLOT_TABLE	(256U)
+
+#define RPCRDMA_DEF_INLINE  (1024)	/* default inline max */
+
+#define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
+
+#define RDMA_RESOLVE_TIMEOUT	(5*HZ)	/* TBD 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */
+
+/* memory registration strategies */
+#define RPCRDMA_PERSISTENT_REGISTRATION (1)
+
+enum rpcrdma_memreg {
+	RPCRDMA_BOUNCEBUFFERS = 0,
+	RPCRDMA_REGISTER,
+	RPCRDMA_MEMWINDOWS,
+	RPCRDMA_MEMWINDOWS_ASYNC,
+	RPCRDMA_MTHCAFMR,
+	RPCRDMA_ALLPHYSICAL,
+	RPCRDMA_LAST
+};
+
+#endif /* _LINUX_SUNRPC_XPRTRDMA_H */
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
new file mode 100644
index 0000000..2c6c2c2
--- /dev/null
+++ b/include/linux/sunrpc/xprtsock.h
@@ -0,0 +1,51 @@
+/*
+ *  linux/include/linux/sunrpc/xprtsock.h
+ *
+ *  Declarations for the RPC transport socket provider.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRTSOCK_H
+#define _LINUX_SUNRPC_XPRTSOCK_H
+
+#ifdef __KERNEL__
+
+/*
+ * Socket transport setup operations
+ */
+struct rpc_xprt *xs_setup_udp(struct xprt_create *args);
+struct rpc_xprt *xs_setup_tcp(struct xprt_create *args);
+
+int		init_socket_xprt(void);
+void		cleanup_socket_xprt(void);
+
+/*
+ * RPC transport identifiers for UDP, TCP
+ *
+ * To preserve compatibility with the historical use of raw IP protocol
+ * id's for transport selection, these are specified with the previous
+ * values. No such restriction exists for new transports, except that
+ * they may not collide with these values (17 and 6, respectively).
+ */
+#define XPRT_TRANSPORT_UDP	IPPROTO_UDP
+#define XPRT_TRANSPORT_TCP	IPPROTO_TCP
+
+/*
+ * RPC slot table sizes for UDP, TCP transports
+ */
+extern unsigned int xprt_udp_slot_table_entries;
+extern unsigned int xprt_tcp_slot_table_entries;
+
+/*
+ * Parameters for choosing a free port
+ */
+extern unsigned int xprt_min_resvport;
+extern unsigned int xprt_max_resvport;
+
+#define RPC_MIN_RESVPORT	(1U)
+#define RPC_MAX_RESVPORT	(65535U)
+#define RPC_DEF_MIN_RESVPORT	(665U)
+#define RPC_DEF_MAX_RESVPORT	(1023U)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SUNRPC_XPRTSOCK_H */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c5b94c1..bac17c5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -315,7 +315,7 @@
  */
  	u32	snd_ssthresh;	/* Slow start size threshold		*/
  	u32	snd_cwnd;	/* Sending congestion window		*/
- 	u16	snd_cwnd_cnt;	/* Linear increase counter		*/
+	u32	snd_cwnd_cnt;	/* Linear increase counter		*/
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c7c3337..d1321a8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -62,8 +62,6 @@
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned for_writepages:1;	/* This is a writepages() call */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
-
-	void *fs_private;		/* For use by ->writepages() */
 };
 
 /*
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
new file mode 100644
index 0000000..911c2cd
--- /dev/null
+++ b/include/net/inet_frag.h
@@ -0,0 +1,60 @@
+#ifndef __NET_FRAG_H__
+#define __NET_FRAG_H__
+
+struct inet_frag_queue {
+	struct hlist_node	list;
+	struct list_head	lru_list;   /* lru list member */
+	spinlock_t		lock;
+	atomic_t		refcnt;
+	struct timer_list	timer;      /* when will this queue expire? */
+	struct sk_buff		*fragments; /* list of received fragments */
+	ktime_t			stamp;
+	int			len;        /* total length of orig datagram */
+	int			meat;
+	__u8			last_in;    /* first/last segment arrived? */
+
+#define COMPLETE		4
+#define FIRST_IN		2
+#define LAST_IN			1
+};
+
+#define INETFRAGS_HASHSZ		64
+
+struct inet_frags_ctl {
+	int high_thresh;
+	int low_thresh;
+	int timeout;
+	int secret_interval;
+};
+
+struct inet_frags {
+	struct list_head	lru_list;
+	struct hlist_head	hash[INETFRAGS_HASHSZ];
+	rwlock_t		lock;
+	u32			rnd;
+	int			nqueues;
+	int			qsize;
+	atomic_t		mem;
+	struct timer_list	secret_timer;
+	struct inet_frags_ctl	*ctl;
+
+	unsigned int		(*hashfn)(struct inet_frag_queue *);
+	void			(*destructor)(struct inet_frag_queue *);
+	void			(*skb_free)(struct sk_buff *);
+};
+
+void inet_frags_init(struct inet_frags *);
+void inet_frags_fini(struct inet_frags *);
+
+void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+void inet_frag_destroy(struct inet_frag_queue *q,
+				struct inet_frags *f, int *work);
+int inet_frag_evictor(struct inet_frags *f);
+
+static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+{
+	if (atomic_dec_and_test(&q->refcnt))
+		inet_frag_destroy(q, f, NULL);
+}
+
+#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index 3af3ed9..840dd91 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -160,6 +160,7 @@
 #define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
 #define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
 #define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
+#define IP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
 #define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
@@ -177,10 +178,8 @@
 extern int sysctl_ip_nonlocal_bind;
 
 /* From ip_fragment.c */
-extern int sysctl_ipfrag_high_thresh; 
-extern int sysctl_ipfrag_low_thresh;
-extern int sysctl_ipfrag_time;
-extern int sysctl_ipfrag_secret_interval;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl ip4_frags_ctl;
 extern int sysctl_ipfrag_max_dist;
 
 /* From inetpeer.c */
@@ -332,9 +331,9 @@
 	IP_DEFRAG_VS_FWD
 };
 
-struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
-extern int ip_frag_nqueues;
-extern atomic_t ip_frag_mem;
+int ip_defrag(struct sk_buff *skb, u32 user);
+int ip_frag_mem(void);
+int ip_frag_nqueues(void);
 
 /*
  *	Functions provided by ip_forward.c
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 672564e..4187056 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -464,10 +464,10 @@
 			unsigned int proto_off,
 			int inverse);
 
-	int (*snat_handler)(struct sk_buff **pskb,
+	int (*snat_handler)(struct sk_buff *skb,
 			    struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
 
-	int (*dnat_handler)(struct sk_buff **pskb,
+	int (*dnat_handler)(struct sk_buff *skb,
 			    struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
 
 	int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
@@ -654,11 +654,11 @@
 
 	/* output hook: return false if can't linearize. diff set for TCP.  */
 	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
-		       struct sk_buff **, int *diff);
+		       struct sk_buff *, int *diff);
 
 	/* input hook: return false if can't linearize. diff set for TCP. */
 	int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
-		      struct sk_buff **, int *diff);
+		      struct sk_buff *, int *diff);
 
 	/* ip_vs_app initializer */
 	int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
@@ -832,8 +832,8 @@
 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
-extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb);
-extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb);
+extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
+extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
 extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 			     char *o_buf, int o_len, char *n_buf, int n_len);
 extern int ip_vs_app_init(void);
@@ -984,7 +984,6 @@
 	return fwd;
 }
 
-extern int ip_vs_make_skb_writable(struct sk_buff **pskb, int len);
 extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		struct ip_vs_conn *cp, int dir);
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 31b3f1b..cc796cb 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -120,12 +120,21 @@
 	SNMP_INC_STATS##modifier(statname##_statistics, (field));	\
 })
 
+#define _DEVADD(statname, modifier, idev, field, val)			\
+({									\
+	struct inet6_dev *_idev = (idev);				\
+	if (likely(_idev != NULL))					\
+		SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \
+	SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\
+})
+
 /* MIBs */
 DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 
 #define IP6_INC_STATS(idev,field)	_DEVINC(ipv6, , idev, field)
 #define IP6_INC_STATS_BH(idev,field)	_DEVINC(ipv6, _BH, idev, field)
 #define IP6_INC_STATS_USER(idev,field)	_DEVINC(ipv6, _USER, idev, field)
+#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val)
 
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
@@ -240,7 +249,7 @@
 					       void (*destructor)(struct sock *));
 
 
-extern int			ipv6_parse_hopopts(struct sk_buff **skbp);
+extern int			ipv6_parse_hopopts(struct sk_buff *skb);
 
 extern struct ipv6_txoptions *  ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt);
 extern struct ipv6_txoptions *	ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
@@ -252,8 +261,8 @@
 
 extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);
 
-extern int ip6_frag_nqueues;
-extern atomic_t ip6_frag_mem;
+int ip6_frag_nqueues(void);
+int ip6_frag_mem(void);
 
 #define IPV6_FRAG_TIMEOUT	(60*HZ)		/* 60 seconds */
 
@@ -565,10 +574,8 @@
 /*
  * reassembly.c
  */
-extern int sysctl_ip6frag_high_thresh;
-extern int sysctl_ip6frag_low_thresh;
-extern int sysctl_ip6frag_time;
-extern int sysctl_ip6frag_secret_interval;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl ip6_frags_ctl;
 
 extern const struct proto_ops inet6_stream_ops;
 extern const struct proto_ops inet6_dgram_ops;
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 070d12c..f703533 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -15,8 +15,7 @@
 			       struct net_device *out,
 			       int (*okfn)(struct sk_buff *));
 
-extern unsigned int nf_ct_frag6_timeout;
-extern unsigned int nf_ct_frag6_low_thresh;
-extern unsigned int nf_ct_frag6_high_thresh;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl nf_frags_ctl;
 
 #endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 4056f5f..a532e7b 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -22,7 +22,7 @@
    of connection tracking. */
 extern unsigned int nf_conntrack_in(int pf,
 				    unsigned int hooknum,
-				    struct sk_buff **pskb);
+				    struct sk_buff *skb);
 
 extern int nf_conntrack_init(void);
 extern void nf_conntrack_cleanup(void);
@@ -60,17 +60,17 @@
 extern struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple);
 
-extern int __nf_conntrack_confirm(struct sk_buff **pskb);
+extern int __nf_conntrack_confirm(struct sk_buff *skb);
 
 /* Confirm a connection: returns NF_DROP if packet must be dropped. */
-static inline int nf_conntrack_confirm(struct sk_buff **pskb)
+static inline int nf_conntrack_confirm(struct sk_buff *skb)
 {
-	struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct;
+	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	int ret = NF_ACCEPT;
 
 	if (ct) {
 		if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
-			ret = __nf_conntrack_confirm(pskb);
+			ret = __nf_conntrack_confirm(skb);
 		nf_ct_deliver_cached_events(ct);
 	}
 	return ret;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 0dcc4c8..d7b2d54 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -29,7 +29,7 @@
 
 	/* Function to call when data passes; return verdict, or -1 to
            invalidate. */
-	int (*help)(struct sk_buff **pskb,
+	int (*help)(struct sk_buff *skb,
 		    unsigned int protoff,
 		    struct nf_conn *ct,
 		    enum ip_conntrack_info conntrackinfo);
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index c3cd127..f29eeb9 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -10,12 +10,12 @@
 extern unsigned int nf_nat_packet(struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned int hooknum,
-				  struct sk_buff **pskb);
+				  struct sk_buff *skb);
 
 extern int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 					 enum ip_conntrack_info ctinfo,
 					 unsigned int hooknum,
-					 struct sk_buff **pskb);
+					 struct sk_buff *skb);
 
 static inline int nf_nat_initialized(struct nf_conn *ct,
 				     enum nf_nat_manip_type manip)
diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h
index ec98ecf..58dd226 100644
--- a/include/net/netfilter/nf_nat_helper.h
+++ b/include/net/netfilter/nf_nat_helper.h
@@ -7,21 +7,21 @@
 struct sk_buff;
 
 /* These return true or false. */
-extern int nf_nat_mangle_tcp_packet(struct sk_buff **skb,
+extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 				    struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int match_offset,
 				    unsigned int match_len,
 				    const char *rep_buffer,
 				    unsigned int rep_len);
-extern int nf_nat_mangle_udp_packet(struct sk_buff **skb,
+extern int nf_nat_mangle_udp_packet(struct sk_buff *skb,
 				    struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int match_offset,
 				    unsigned int match_len,
 				    const char *rep_buffer,
 				    unsigned int rep_len);
-extern int nf_nat_seq_adjust(struct sk_buff **pskb,
+extern int nf_nat_seq_adjust(struct sk_buff *skb,
 			     struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo);
 
diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index 14c7b2d..04578bf 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -18,7 +18,7 @@
 
 	/* Translate a packet to the target according to manip type.
 	   Return true if succeeded. */
-	int (*manip_pkt)(struct sk_buff **pskb,
+	int (*manip_pkt)(struct sk_buff *skb,
 			 unsigned int iphdroff,
 			 const struct nf_conntrack_tuple *tuple,
 			 enum nf_nat_manip_type maniptype);
diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index f974318..75d1825 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -6,7 +6,7 @@
 
 extern int nf_nat_rule_init(void) __init;
 extern void nf_nat_rule_cleanup(void);
-extern int nf_nat_rule_find(struct sk_buff **pskb,
+extern int nf_nat_rule_find(struct sk_buff *skb,
 			    unsigned int hooknum,
 			    const struct net_device *in,
 			    const struct net_device *out,
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 105bf12..1166ffb 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -45,7 +45,7 @@
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 struct inet6_protocol 
 {
-	int	(*handler)(struct sk_buff **skb);
+	int	(*handler)(struct sk_buff *skb);
 
 	void	(*err_handler)(struct sk_buff *skb,
 			       struct inet6_skb_parm *opt,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 77be396..0e84484 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1051,7 +1051,7 @@
 extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
 extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi);
-extern int xfrm6_rcv(struct sk_buff **pskb);
+extern int xfrm6_rcv(struct sk_buff *skb);
 extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 			    xfrm_address_t *saddr, u8 proto);
 extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 04f3ffb..0ae703c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1525,6 +1525,7 @@
 			context->names[idx].ino = (unsigned long)-1;
 	}
 }
+EXPORT_SYMBOL_GPL(__audit_inode_child);
 
 /**
  * auditsc_get_stamp - get local copies of audit_context values
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 734da57..a6f1ee9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1521,7 +1521,7 @@
 }
 
 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-	       	struct held_lock *hlock, int chain_head)
+	       	struct held_lock *hlock, int chain_head, u64 chain_key)
 {
 	/*
 	 * Trylock needs to maintain the stack of held locks, but it
@@ -1534,7 +1534,7 @@
 	 * graph_lock for us)
 	 */
 	if (!hlock->trylock && (hlock->check == 2) &&
-			lookup_chain_cache(curr->curr_chain_key, hlock->class)) {
+			lookup_chain_cache(chain_key, hlock->class)) {
 		/*
 		 * Check whether last held lock:
 		 *
@@ -1576,7 +1576,7 @@
 #else
 static inline int validate_chain(struct task_struct *curr,
 	       	struct lockdep_map *lock, struct held_lock *hlock,
-		int chain_head)
+		int chain_head, u64 chain_key)
 {
 	return 1;
 }
@@ -2450,11 +2450,11 @@
 		chain_head = 1;
 	}
 	chain_key = iterate_chain_key(chain_key, id);
-	curr->curr_chain_key = chain_key;
 
-	if (!validate_chain(curr, lock, hlock, chain_head))
+	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
 		return 0;
 
+	curr->curr_chain_key = chain_key;
 	curr->lockdep_depth++;
 	check_chain_key(curr);
 #ifdef CONFIG_DEBUG_LOCKDEP
@@ -3199,3 +3199,19 @@
 }
 
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
+
+void lockdep_sys_exit(void)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(curr->lockdep_depth)) {
+		if (!debug_locks_off())
+			return;
+		printk("\n================================================\n");
+		printk(  "[ BUG: lock held when returning to user space! ]\n");
+		printk(  "------------------------------------------------\n");
+		printk("%s/%d is leaving the kernel with locks still held!\n",
+				curr->comm, curr->pid);
+		lockdep_print_held_locks(curr);
+	}
+}
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index c851b2d..8a135bd 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -25,28 +25,38 @@
 
 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct lock_class *class = v;
+	struct lock_class *class;
 
 	(*pos)++;
 
-	if (class->lock_entry.next != &all_lock_classes)
-		class = list_entry(class->lock_entry.next, struct lock_class,
-				  lock_entry);
-	else
-		class = NULL;
-	m->private = class;
+	if (v == SEQ_START_TOKEN)
+		class = m->private;
+	else {
+		class = v;
+
+		if (class->lock_entry.next != &all_lock_classes)
+			class = list_entry(class->lock_entry.next,
+					   struct lock_class, lock_entry);
+		else
+			class = NULL;
+	}
 
 	return class;
 }
 
 static void *l_start(struct seq_file *m, loff_t *pos)
 {
-	struct lock_class *class = m->private;
+	struct lock_class *class;
+	loff_t i = 0;
 
-	if (&class->lock_entry == all_lock_classes.next)
-		seq_printf(m, "all lock classes:\n");
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
 
-	return class;
+	list_for_each_entry(class, &all_lock_classes, lock_entry) {
+		if (++i == *pos)
+		return class;
+	}
+	return NULL;
 }
 
 static void l_stop(struct seq_file *m, void *v)
@@ -101,10 +111,15 @@
 static int l_show(struct seq_file *m, void *v)
 {
 	unsigned long nr_forward_deps, nr_backward_deps;
-	struct lock_class *class = m->private;
+	struct lock_class *class = v;
 	struct lock_list *entry;
 	char c1, c2, c3, c4;
 
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(m, "all lock classes:\n");
+		return 0;
+	}
+
 	seq_printf(m, "%p", class->key);
 #ifdef CONFIG_DEBUG_LOCKDEP
 	seq_printf(m, " OPS:%8ld", class->ops);
@@ -523,10 +538,11 @@
 {
 	struct lock_stat_seq *data = m->private;
 
-	if (data->iter == data->stats)
-		seq_header(m);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
 
-	if (data->iter == data->iter_end)
+	data->iter = data->stats + *pos;
+	if (data->iter >= data->iter_end)
 		data->iter = NULL;
 
 	return data->iter;
@@ -538,8 +554,13 @@
 
 	(*pos)++;
 
-	data->iter = v;
-	data->iter++;
+	if (v == SEQ_START_TOKEN)
+		data->iter = data->stats;
+	else {
+		data->iter = v;
+		data->iter++;
+	}
+
 	if (data->iter == data->iter_end)
 		data->iter = NULL;
 
@@ -552,9 +573,11 @@
 
 static int ls_show(struct seq_file *m, void *v)
 {
-	struct lock_stat_seq *data = m->private;
+	if (v == SEQ_START_TOKEN)
+		seq_header(m);
+	else
+		seq_stats(m, v);
 
-	seq_stats(m, data->iter);
 	return 0;
 }
 
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 691b865..d7fe50c 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -51,6 +51,7 @@
 
 EXPORT_SYMBOL(__mutex_init);
 
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * We split the mutex lock/unlock logic into separate fastpath and
  * slowpath functions, to reduce the register pressure on the fastpath.
@@ -92,6 +93,7 @@
 }
 
 EXPORT_SYMBOL(mutex_lock);
+#endif
 
 static void fastcall noinline __sched
 __mutex_unlock_slowpath(atomic_t *lock_count);
@@ -122,7 +124,8 @@
  * Lock a mutex (possibly interruptible), slowpath:
  */
 static inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+	       	unsigned long ip)
 {
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
@@ -132,7 +135,7 @@
 	spin_lock_mutex(&lock->wait_lock, flags);
 
 	debug_mutex_lock_common(lock, &waiter);
-	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	mutex_acquire(&lock->dep_map, subclass, 0, ip);
 	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -143,7 +146,7 @@
 	if (old_val == 1)
 		goto done;
 
-	lock_contended(&lock->dep_map, _RET_IP_);
+	lock_contended(&lock->dep_map, ip);
 
 	for (;;) {
 		/*
@@ -166,7 +169,7 @@
 		if (unlikely(state == TASK_INTERRUPTIBLE &&
 						signal_pending(task))) {
 			mutex_remove_waiter(lock, &waiter, task_thread_info(task));
-			mutex_release(&lock->dep_map, 1, _RET_IP_);
+			mutex_release(&lock->dep_map, 1, ip);
 			spin_unlock_mutex(&lock->wait_lock, flags);
 
 			debug_mutex_free_waiter(&waiter);
@@ -197,20 +200,12 @@
 	return 0;
 }
 
-static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
-{
-	struct mutex *lock = container_of(lock_count, struct mutex, count);
-
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
-}
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -219,7 +214,7 @@
 mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -271,6 +266,7 @@
 	__mutex_unlock_common_slowpath(lock_count, 1);
 }
 
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Here come the less common (and hence less performance-critical) APIs:
  * mutex_lock_interruptible() and mutex_trylock().
@@ -298,13 +294,22 @@
 
 EXPORT_SYMBOL(mutex_lock_interruptible);
 
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+}
+
 static int fastcall noinline __sched
 __mutex_lock_interruptible_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
 }
+#endif
 
 /*
  * Spinlock based trylock, we take the spinlock and check whether we
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c2dd84..130214f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -49,6 +49,14 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
 /* Definition for rcupdate control block. */
 static struct rcu_ctrlblk rcu_ctrlblk = {
 	.cur = -300,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 848b8fa..93867bb 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -23,7 +23,7 @@
 
 #include "br_private.h"
 
-int (*br_should_route_hook) (struct sk_buff **pskb) = NULL;
+int (*br_should_route_hook)(struct sk_buff *skb);
 
 static struct llc_sap *br_stp_sap;
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3a8a015..3cedd4e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -126,6 +126,10 @@
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
@@ -145,7 +149,7 @@
 	case BR_STATE_FORWARDING:
 
 		if (br_should_route_hook) {
-			if (br_should_route_hook(&skb))
+			if (br_should_route_hook(skb))
 				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8245f05..da22f90 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -503,18 +503,14 @@
  * receiving device) to make netfilter happy, the REDIRECT
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
 	struct iphdr *iph;
-	struct sk_buff *skb = *pskb;
 	__u32 len = nf_bridge_encap_header_len(skb);
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
-		return NF_STOLEN;
-
 	if (unlikely(!pskb_may_pull(skb, len)))
 		goto out;
 
@@ -584,13 +580,11 @@
  * took place when the packet entered the bridge), but we
  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
  * prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
-
 	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
 		dst_release(skb->dst);
 		skb->dst = NULL;
@@ -625,12 +619,11 @@
  * but we are still able to filter on the 'real' indev/outdev
  * because of the physdev module. For ARP, indev and outdev are the
  * bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 				     const struct net_device *in,
 				     const struct net_device *out,
 				     int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
 	struct nf_bridge_info *nf_bridge;
 	struct net_device *parent;
 	int pf;
@@ -648,7 +641,7 @@
 	else
 		pf = PF_INET6;
 
-	nf_bridge_pull_encap_header(*pskb);
+	nf_bridge_pull_encap_header(skb);
 
 	nf_bridge = skb->nf_bridge;
 	if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -666,12 +659,11 @@
 	return NF_STOLEN;
 }
 
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
 	struct net_device **d = (struct net_device **)(skb->cb);
 
 #ifdef CONFIG_SYSCTL
@@ -682,12 +674,12 @@
 	if (skb->protocol != htons(ETH_P_ARP)) {
 		if (!IS_VLAN_ARP(skb))
 			return NF_ACCEPT;
-		nf_bridge_pull_encap_header(*pskb);
+		nf_bridge_pull_encap_header(skb);
 	}
 
 	if (arp_hdr(skb)->ar_pln != 4) {
 		if (IS_VLAN_ARP(skb))
-			nf_bridge_push_encap_header(*pskb);
+			nf_bridge_push_encap_header(skb);
 		return NF_ACCEPT;
 	}
 	*d = (struct net_device *)in;
@@ -709,13 +701,12 @@
  * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
  * will be executed.
  */
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    int (*okfn)(struct sk_buff *))
 {
 	struct net_device *realindev;
-	struct sk_buff *skb = *pskb;
 	struct nf_bridge_info *nf_bridge;
 
 	if (!skb->nf_bridge)
@@ -752,13 +743,12 @@
 }
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
-	struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	int pf;
 
@@ -828,13 +818,13 @@
 /* IP/SABOTAGE *****************************************************/
 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
  * for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	if ((*pskb)->nf_bridge &&
-	    !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+	if (skb->nf_bridge &&
+	    !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
 		return NF_STOP;
 	}
 
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index ffe468a..48a80e4 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -15,7 +15,7 @@
 #include <net/arp.h>
 #include <linux/module.h>
 
-static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
@@ -23,7 +23,6 @@
 	__be32 _sip, *siptr, _dip, *diptr;
 	struct arphdr _ah, *ap;
 	unsigned char _sha[ETH_ALEN], *shp;
-	struct sk_buff *skb = *pskb;
 
 	ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
 	if (ap == NULL)
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4582659..74262e9 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -8,29 +8,22 @@
  *
  */
 
+#include <linux/netfilter.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 #include <linux/module.h>
 #include <net/sock.h>
 
-static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	struct ebt_nat_info *info = (struct ebt_nat_info *)data;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, 0))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-	memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN);
 	return info->target;
 }
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 62d23c7..6cba543 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 #include <linux/module.h>
 
-static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
@@ -25,13 +25,13 @@
 	int action = info->target & -16;
 
 	if (action == MARK_SET_VALUE)
-		(*pskb)->mark = info->mark;
+		skb->mark = info->mark;
 	else if (action == MARK_OR_VALUE)
-		(*pskb)->mark |= info->mark;
+		skb->mark |= info->mark;
 	else if (action == MARK_AND_VALUE)
-		(*pskb)->mark &= info->mark;
+		skb->mark &= info->mark;
 	else
-		(*pskb)->mark ^= info->mark;
+		skb->mark ^= info->mark;
 
 	return info->target | ~EBT_VERDICT_BITS;
 }
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9f378ea..422cb83 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -8,35 +8,28 @@
  *
  */
 
+#include <linux/netfilter.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_redirect.h>
 #include <linux/module.h>
 #include <net/sock.h>
 #include "../br_private.h"
 
-static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	struct ebt_redirect_info *info = (struct ebt_redirect_info *)data;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, 0))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
 	if (hooknr != NF_BR_BROUTING)
-		memcpy(eth_hdr(*pskb)->h_dest,
+		memcpy(eth_hdr(skb)->h_dest,
 		       in->br_port->br->dev->dev_addr, ETH_ALEN);
 	else
-		memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN);
-	(*pskb)->pkt_type = PACKET_HOST;
+		memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN);
+	skb->pkt_type = PACKET_HOST;
 	return info->target;
 }
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index a507221..425ac92 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/netfilter.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 #include <linux/module.h>
@@ -15,34 +16,26 @@
 #include <linux/if_arp.h>
 #include <net/arp.h>
 
-static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	struct ebt_nat_info *info = (struct ebt_nat_info *) data;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, 0))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-	memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
 	if (!(info->target & NAT_ARP_BIT) &&
-	    eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) {
+	    eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
 		struct arphdr _ah, *ap;
 
-		ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah);
+		ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
 		if (ap == NULL)
 			return EBT_DROP;
 		if (ap->ar_hln != ETH_ALEN)
 			goto out;
-		if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN))
+		if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN))
 			return EBT_DROP;
 	}
 out:
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d37ce04..e44519e 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -51,11 +51,11 @@
 	.me		= THIS_MODULE,
 };
 
-static int ebt_broute(struct sk_buff **pskb)
+static int ebt_broute(struct sk_buff *skb)
 {
 	int ret;
 
-	ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL,
+	ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
 	   &broute_table);
 	if (ret == NF_DROP)
 		return 1; /* route it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 81d8414..210493f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -61,10 +61,10 @@
 };
 
 static unsigned int
-ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, pskb, in, out, &frame_filter);
+	return ebt_do_table(hook, skb, in, out, &frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9c50488..3e58c2e 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -61,17 +61,17 @@
 };
 
 static unsigned int
-ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+	return ebt_do_table(hook, skb, in, out, &frame_nat);
 }
 
 static unsigned int
-ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+	return ebt_do_table(hook, skb, in, out, &frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6018d0e..d5a09ea 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -142,7 +142,7 @@
 }
 
 /* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
+unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
    struct ebt_table *table)
 {
@@ -172,19 +172,19 @@
 	base = private->entries;
 	i = 0;
 	while (i < nentries) {
-		if (ebt_basic_match(point, eth_hdr(*pskb), in, out))
+		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0)
 			goto letscontinue;
 
 		/* increase counter */
 		(*(counter_base + i)).pcnt++;
-		(*(counter_base + i)).bcnt+=(**pskb).len;
+		(*(counter_base + i)).bcnt += skb->len;
 
 		/* these should only watch: not modify, nor tell us
 		   what to do with the packet */
-		EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in,
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in,
 		   out);
 
 		t = (struct ebt_entry_target *)
@@ -193,7 +193,7 @@
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else
-			verdict = t->u.target->target(pskb, hook,
+			verdict = t->u.target->target(skb, hook,
 			   in, out, t->data, t->target_size);
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
diff --git a/net/core/dev.c b/net/core/dev.c
index 99b7bda..38b03da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1362,22 +1362,21 @@
 		goto out_set_summed;
 	}
 
-	if (skb_cloned(skb)) {
+	offset = skb->csum_start - skb_headroom(skb);
+	BUG_ON(offset >= skb_headlen(skb));
+	csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+	offset += skb->csum_offset;
+	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+
+	if (skb_cloned(skb) &&
+	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 		if (ret)
 			goto out;
 	}
 
-	offset = skb->csum_start - skb_headroom(skb);
-	BUG_ON(offset > (int)skb->len);
-	csum = skb_checksum(skb, offset, skb->len-offset, 0);
-
-	offset = skb_headlen(skb) - offset;
-	BUG_ON(offset <= 0);
-	BUG_ON(skb->csum_offset + 2 > offset);
-
-	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
-		csum_fold(csum);
+	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
@@ -1949,28 +1948,52 @@
 	struct Qdisc *q;
 	struct net_device *dev = skb->dev;
 	int result = TC_ACT_OK;
+	u32 ttl = G_TC_RTTL(skb->tc_verd);
 
-	if (dev->qdisc_ingress) {
-		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
-		if (MAX_RED_LOOP < ttl++) {
-			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
-				skb->iif, skb->dev->ifindex);
-			return TC_ACT_SHOT;
-		}
-
-		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
-
-		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
-
-		spin_lock(&dev->ingress_lock);
-		if ((q = dev->qdisc_ingress) != NULL)
-			result = q->enqueue(skb, q);
-		spin_unlock(&dev->ingress_lock);
-
+	if (MAX_RED_LOOP < ttl++) {
+		printk(KERN_WARNING
+		       "Redir loop detected Dropping packet (%d->%d)\n",
+		       skb->iif, dev->ifindex);
+		return TC_ACT_SHOT;
 	}
 
+	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
+	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+
+	spin_lock(&dev->ingress_lock);
+	if ((q = dev->qdisc_ingress) != NULL)
+		result = q->enqueue(skb, q);
+	spin_unlock(&dev->ingress_lock);
+
 	return result;
 }
+
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+					 struct packet_type **pt_prev,
+					 int *ret, struct net_device *orig_dev)
+{
+	if (!skb->dev->qdisc_ingress)
+		goto out;
+
+	if (*pt_prev) {
+		*ret = deliver_skb(skb, *pt_prev, orig_dev);
+		*pt_prev = NULL;
+	} else {
+		/* Huh? Why does turning on AF_PACKET affect this? */
+		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+	}
+
+	switch (ing_filter(skb)) {
+	case TC_ACT_SHOT:
+	case TC_ACT_STOLEN:
+		kfree_skb(skb);
+		return NULL;
+	}
+
+out:
+	skb->tc_verd = 0;
+	return skb;
+}
 #endif
 
 int netif_receive_skb(struct sk_buff *skb)
@@ -2021,21 +2044,9 @@
 	}
 
 #ifdef CONFIG_NET_CLS_ACT
-	if (pt_prev) {
-		ret = deliver_skb(skb, pt_prev, orig_dev);
-		pt_prev = NULL; /* noone else should process this after*/
-	} else {
-		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
-	}
-
-	ret = ing_filter(skb);
-
-	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
-		kfree_skb(skb);
+	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+	if (!skb)
 		goto out;
-	}
-
-	skb->tc_verd = 0;
 ncls:
 #endif
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c52df85..cd3af59 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -481,6 +481,8 @@
 	if (!creat)
 		goto out;
 
+	ASSERT_RTNL();
+
 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 	if (!n)
 		goto out;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 944189d..70d9b5d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -362,6 +362,97 @@
 	__kfree_skb(skb);
 }
 
+static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+	new->tstamp		= old->tstamp;
+	new->dev		= old->dev;
+	new->transport_header	= old->transport_header;
+	new->network_header	= old->network_header;
+	new->mac_header		= old->mac_header;
+	new->dst		= dst_clone(old->dst);
+#ifdef CONFIG_INET
+	new->sp			= secpath_get(old->sp);
+#endif
+	memcpy(new->cb, old->cb, sizeof(old->cb));
+	new->csum_start		= old->csum_start;
+	new->csum_offset	= old->csum_offset;
+	new->local_df		= old->local_df;
+	new->pkt_type		= old->pkt_type;
+	new->ip_summed		= old->ip_summed;
+	skb_copy_queue_mapping(new, old);
+	new->priority		= old->priority;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	new->ipvs_property	= old->ipvs_property;
+#endif
+	new->protocol		= old->protocol;
+	new->mark		= old->mark;
+	__nf_copy(new, old);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+	new->nf_trace		= old->nf_trace;
+#endif
+#ifdef CONFIG_NET_SCHED
+	new->tc_index		= old->tc_index;
+#ifdef CONFIG_NET_CLS_ACT
+	new->tc_verd		= old->tc_verd;
+#endif
+#endif
+	skb_copy_secmark(new, old);
+}
+
+static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
+{
+#define C(x) n->x = skb->x
+
+	n->next = n->prev = NULL;
+	n->sk = NULL;
+	__copy_skb_header(n, skb);
+
+	C(len);
+	C(data_len);
+	C(mac_len);
+	n->cloned = 1;
+	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+	n->nohdr = 0;
+	n->destructor = NULL;
+#ifdef CONFIG_NET_CLS_ACT
+	/* FIXME What is this and why don't we do it in copy_skb_header? */
+	n->tc_verd = SET_TC_VERD(n->tc_verd,0);
+	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
+	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
+	C(iif);
+#endif
+	C(truesize);
+	atomic_set(&n->users, 1);
+	C(head);
+	C(data);
+	C(tail);
+	C(end);
+
+	atomic_inc(&(skb_shinfo(skb)->dataref));
+	skb->cloned = 1;
+
+	return n;
+#undef C
+}
+
+/**
+ *	skb_morph	-	morph one skb into another
+ *	@dst: the skb to receive the contents
+ *	@src: the skb to supply the contents
+ *
+ *	This is identical to skb_clone except that the target skb is
+ *	supplied by the user.
+ *
+ *	The target skb is returned upon exit.
+ */
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
+{
+	skb_release_data(dst);
+	return __skb_clone(dst, src);
+}
+EXPORT_SYMBOL_GPL(skb_morph);
+
 /**
  *	skb_clone	-	duplicate an sk_buff
  *	@skb: buffer to clone
@@ -393,66 +484,7 @@
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
-#define C(x) n->x = skb->x
-
-	n->next = n->prev = NULL;
-	n->sk = NULL;
-	C(tstamp);
-	C(dev);
-	C(transport_header);
-	C(network_header);
-	C(mac_header);
-	C(dst);
-	dst_clone(skb->dst);
-	C(sp);
-#ifdef CONFIG_INET
-	secpath_get(skb->sp);
-#endif
-	memcpy(n->cb, skb->cb, sizeof(skb->cb));
-	C(len);
-	C(data_len);
-	C(mac_len);
-	C(csum);
-	C(local_df);
-	n->cloned = 1;
-	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
-	n->nohdr = 0;
-	C(pkt_type);
-	C(ip_summed);
-	skb_copy_queue_mapping(n, skb);
-	C(priority);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-	C(ipvs_property);
-#endif
-	C(protocol);
-	n->destructor = NULL;
-	C(mark);
-	__nf_copy(n, skb);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-	C(nf_trace);
-#endif
-#ifdef CONFIG_NET_SCHED
-	C(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
-	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
-	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
-	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
-	C(iif);
-#endif
-#endif
-	skb_copy_secmark(n, skb);
-	C(truesize);
-	atomic_set(&n->users, 1);
-	C(head);
-	C(data);
-	C(tail);
-	C(end);
-
-	atomic_inc(&(skb_shinfo(skb)->dataref));
-	skb->cloned = 1;
-
-	return n;
+	return __skb_clone(n, skb);
 }
 
 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
@@ -463,50 +495,15 @@
 	 */
 	unsigned long offset = new->data - old->data;
 #endif
-	new->sk		= NULL;
-	new->dev	= old->dev;
-	skb_copy_queue_mapping(new, old);
-	new->priority	= old->priority;
-	new->protocol	= old->protocol;
-	new->dst	= dst_clone(old->dst);
-#ifdef CONFIG_INET
-	new->sp		= secpath_get(old->sp);
-#endif
-	new->csum_start = old->csum_start;
-	new->csum_offset = old->csum_offset;
-	new->ip_summed = old->ip_summed;
-	new->transport_header = old->transport_header;
-	new->network_header   = old->network_header;
-	new->mac_header	      = old->mac_header;
+
+	__copy_skb_header(new, old);
+
 #ifndef NET_SKBUFF_DATA_USES_OFFSET
 	/* {transport,network,mac}_header are relative to skb->head */
 	new->transport_header += offset;
 	new->network_header   += offset;
 	new->mac_header	      += offset;
 #endif
-	memcpy(new->cb, old->cb, sizeof(old->cb));
-	new->local_df	= old->local_df;
-	new->fclone	= SKB_FCLONE_UNAVAILABLE;
-	new->pkt_type	= old->pkt_type;
-	new->tstamp	= old->tstamp;
-	new->destructor = NULL;
-	new->mark	= old->mark;
-	__nf_copy(new, old);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-	new->nf_trace	= old->nf_trace;
-#endif
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-	new->ipvs_property = old->ipvs_property;
-#endif
-#ifdef CONFIG_NET_SCHED
-#ifdef CONFIG_NET_CLS_ACT
-	new->tc_verd = old->tc_verd;
-#endif
-	new->tc_index	= old->tc_index;
-#endif
-	skb_copy_secmark(new, old);
-	atomic_set(&new->users, 1);
 	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
 	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -685,7 +682,7 @@
 	skb->transport_header += off;
 	skb->network_header   += off;
 	skb->mac_header	      += off;
-	skb->csum_start       += off;
+	skb->csum_start       += nhead;
 	skb->cloned   = 0;
 	skb->hdr_len  = 0;
 	skb->nohdr    = 0;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 006a383..cac5354 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -767,10 +767,9 @@
 	return 0;
 }
 
-static int dccp_v6_rcv(struct sk_buff **pskb)
+static int dccp_v6_rcv(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
-	struct sk_buff *skb = *pskb;
 	struct sock *sk;
 	int min_cov;
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index f7fba77..43fcd29 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -88,12 +88,12 @@
 
 
 static unsigned int dnrmg_hook(unsigned int hook,
-			struct sk_buff **pskb,
+			struct sk_buff *skb,
 			const struct net_device *in,
 			const struct net_device *out,
 			int (*okfn)(struct sk_buff *))
 {
-	dnrmg_send_peer(*pskb);
+	dnrmg_send_peer(skb);
 	return NF_ACCEPT;
 }
 
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d..93fe396 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+	     inet_fragment.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 0000000..484cf51
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,174 @@
+/*
+ * inet fragments management
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * 		Authors:	Pavel Emelyanov <xemul@openvz.org>
+ *				Started as consolidation of ipv4/ip_fragment.c,
+ *				ipv6/reassembly. and ipv6 nf conntrack reassembly
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/inet_frag.h>
+
+static void inet_frag_secret_rebuild(unsigned long dummy)
+{
+	struct inet_frags *f = (struct inet_frags *)dummy;
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&f->lock);
+	get_random_bytes(&f->rnd, sizeof(u32));
+	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_queue *q;
+		struct hlist_node *p, *n;
+
+		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+			unsigned int hval = f->hashfn(q);
+
+			if (hval != i) {
+				hlist_del(&q->list);
+
+				/* Relink to new hash chain. */
+				hlist_add_head(&q->list, &f->hash[hval]);
+			}
+		}
+	}
+	write_unlock(&f->lock);
+
+	mod_timer(&f->secret_timer, now + f->ctl->secret_interval);
+}
+
+void inet_frags_init(struct inet_frags *f)
+{
+	int i;
+
+	for (i = 0; i < INETFRAGS_HASHSZ; i++)
+		INIT_HLIST_HEAD(&f->hash[i]);
+
+	INIT_LIST_HEAD(&f->lru_list);
+	rwlock_init(&f->lock);
+
+	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				   (jiffies ^ (jiffies >> 6)));
+
+	f->nqueues = 0;
+	atomic_set(&f->mem, 0);
+
+	init_timer(&f->secret_timer);
+	f->secret_timer.function = inet_frag_secret_rebuild;
+	f->secret_timer.data = (unsigned long)f;
+	f->secret_timer.expires = jiffies + f->ctl->secret_interval;
+	add_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_init);
+
+void inet_frags_fini(struct inet_frags *f)
+{
+	del_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_fini);
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+	write_lock(&f->lock);
+	hlist_del(&fq->list);
+	list_del(&fq->lru_list);
+	f->nqueues--;
+	write_unlock(&f->lock);
+}
+
+void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+	if (del_timer(&fq->timer))
+		atomic_dec(&fq->refcnt);
+
+	if (!(fq->last_in & COMPLETE)) {
+		fq_unlink(fq, f);
+		atomic_dec(&fq->refcnt);
+		fq->last_in |= COMPLETE;
+	}
+}
+
+EXPORT_SYMBOL(inet_frag_kill);
+
+static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
+						int *work)
+{
+	if (work)
+		*work -= skb->truesize;
+
+	atomic_sub(skb->truesize, &f->mem);
+	if (f->skb_free)
+		f->skb_free(skb);
+	kfree_skb(skb);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
+					int *work)
+{
+	struct sk_buff *fp;
+
+	BUG_TRAP(q->last_in & COMPLETE);
+	BUG_TRAP(del_timer(&q->timer) == 0);
+
+	/* Release all fragment data. */
+	fp = q->fragments;
+	while (fp) {
+		struct sk_buff *xp = fp->next;
+
+		frag_kfree_skb(f, fp, work);
+		fp = xp;
+	}
+
+	if (work)
+		*work -= f->qsize;
+	atomic_sub(f->qsize, &f->mem);
+
+	f->destructor(q);
+
+}
+EXPORT_SYMBOL(inet_frag_destroy);
+
+int inet_frag_evictor(struct inet_frags *f)
+{
+	struct inet_frag_queue *q;
+	int work, evicted = 0;
+
+	work = atomic_read(&f->mem) - f->ctl->low_thresh;
+	while (work > 0) {
+		read_lock(&f->lock);
+		if (list_empty(&f->lru_list)) {
+			read_unlock(&f->lock);
+			break;
+		}
+
+		q = list_first_entry(&f->lru_list,
+				struct inet_frag_queue, lru_list);
+		atomic_inc(&q->refcnt);
+		read_unlock(&f->lock);
+
+		spin_lock(&q->lock);
+		if (!(q->last_in & COMPLETE))
+			inet_frag_kill(q, f);
+		spin_unlock(&q->lock);
+
+		if (atomic_dec_and_test(&q->refcnt))
+			inet_frag_destroy(q, f, &work);
+		evicted++;
+	}
+
+	return evicted;
+}
+EXPORT_SYMBOL(inet_frag_evictor);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index afbf938..877da3e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -40,7 +40,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 
-static inline int ip_forward_finish(struct sk_buff *skb)
+static int ip_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fabb86d..443b3f8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -39,6 +39,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/inet_frag.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -49,21 +50,8 @@
  * as well. Or notify me, at least. --ANK
  */
 
-/* Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to measurably
- * harm machine performance.
- */
-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-
 int sysctl_ipfrag_max_dist __read_mostly = 64;
 
-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
- */
-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
-
 struct ipfrag_skb_cb
 {
 	struct inet_skb_parm	h;
@@ -74,153 +62,102 @@
 
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
-	struct hlist_node list;
-	struct list_head lru_list;	/* lru list member 			*/
+	struct inet_frag_queue q;
+
 	u32		user;
 	__be32		saddr;
 	__be32		daddr;
 	__be16		id;
 	u8		protocol;
-	u8		last_in;
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
-
-	struct sk_buff	*fragments;	/* linked list of received fragments	*/
-	int		len;		/* total length of original datagram	*/
-	int		meat;
-	spinlock_t	lock;
-	atomic_t	refcnt;
-	struct timer_list timer;	/* when will this queue expire?		*/
-	ktime_t		stamp;
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
 };
 
-/* Hash table. */
+struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+	/*
+	 * Fragment cache limits. We will commit 256K at one time. Should we
+	 * cross that limit we will prune down to 192K. This should cope with
+	 * even the most extreme cases without allowing an attacker to
+	 * measurably harm machine performance.
+	 */
+	.high_thresh	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
 
-#define IPQ_HASHSZ	64
+	/*
+	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
+	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
+	 * by TTL.
+	 */
+	.timeout	 = IP_FRAG_TIME,
+	.secret_interval = 10 * 60 * HZ,
+};
 
-/* Per-bucket lock is easy to add now. */
-static struct hlist_head ipq_hash[IPQ_HASHSZ];
-static DEFINE_RWLOCK(ipfrag_lock);
-static u32 ipfrag_hash_rnd;
-static LIST_HEAD(ipq_lru_list);
-int ip_frag_nqueues = 0;
+static struct inet_frags ip4_frags;
 
-static __inline__ void __ipq_unlink(struct ipq *qp)
+int ip_frag_nqueues(void)
 {
-	hlist_del(&qp->list);
-	list_del(&qp->lru_list);
-	ip_frag_nqueues--;
+	return ip4_frags.nqueues;
 }
 
-static __inline__ void ipq_unlink(struct ipq *ipq)
+int ip_frag_mem(void)
 {
-	write_lock(&ipfrag_lock);
-	__ipq_unlink(ipq);
-	write_unlock(&ipfrag_lock);
+	return atomic_read(&ip4_frags.mem);
 }
 
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+			 struct net_device *dev);
+
 static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 {
 	return jhash_3words((__force u32)id << 16 | prot,
 			    (__force u32)saddr, (__force u32)daddr,
-			    ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+			    ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ipfrag_secret_rebuild(unsigned long dummy)
+static unsigned int ip4_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
+	struct ipq *ipq;
 
-	write_lock(&ipfrag_lock);
-	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
-	for (i = 0; i < IPQ_HASHSZ; i++) {
-		struct ipq *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
-			unsigned int hval = ipqhashfn(q->id, q->saddr,
-						      q->daddr, q->protocol);
-
-			if (hval != i) {
-				hlist_del(&q->list);
-
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->list, &ipq_hash[hval]);
-			}
-		}
-	}
-	write_unlock(&ipfrag_lock);
-
-	mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+	ipq = container_of(q, struct ipq, q);
+	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
 
-atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
-
 /* Memory Tracking Functions. */
 static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip_frag_mem);
+	atomic_sub(skb->truesize, &ip4_frags.mem);
 	kfree_skb(skb);
 }
 
-static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 {
-	if (work)
-		*work -= sizeof(struct ipq);
-	atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+	struct ipq *qp;
+
+	qp = container_of(q, struct ipq, q);
+	if (qp->peer)
+		inet_putpeer(qp->peer);
 	kfree(qp);
 }
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
 
 	if (!qp)
 		return NULL;
-	atomic_add(sizeof(struct ipq), &ip_frag_mem);
+	atomic_add(sizeof(struct ipq), &ip4_frags.mem);
 	return qp;
 }
 
 
 /* Destruction primitives. */
 
-/* Complete destruction of ipq. */
-static void ip_frag_destroy(struct ipq *qp, int *work)
+static __inline__ void ipq_put(struct ipq *ipq)
 {
-	struct sk_buff *fp;
-
-	BUG_TRAP(qp->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&qp->timer) == 0);
-
-	if (qp->peer)
-		inet_putpeer(qp->peer);
-
-	/* Release all fragment data. */
-	fp = qp->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	/* Finally, release the queue descriptor itself. */
-	frag_free_queue(qp, work);
-}
-
-static __inline__ void ipq_put(struct ipq *ipq, int *work)
-{
-	if (atomic_dec_and_test(&ipq->refcnt))
-		ip_frag_destroy(ipq, work);
+	inet_frag_put(&ipq->q, &ip4_frags);
 }
 
 /* Kill ipq entry. It is not destroyed immediately,
@@ -228,14 +165,7 @@
  */
 static void ipq_kill(struct ipq *ipq)
 {
-	if (del_timer(&ipq->timer))
-		atomic_dec(&ipq->refcnt);
-
-	if (!(ipq->last_in & COMPLETE)) {
-		ipq_unlink(ipq);
-		atomic_dec(&ipq->refcnt);
-		ipq->last_in |= COMPLETE;
-	}
+	inet_frag_kill(&ipq->q, &ip4_frags);
 }
 
 /* Memory limiting on fragments.  Evictor trashes the oldest
@@ -243,33 +173,11 @@
  */
 static void ip_evictor(void)
 {
-	struct ipq *qp;
-	struct list_head *tmp;
-	int work;
+	int evicted;
 
-	work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
-	if (work <= 0)
-		return;
-
-	while (work > 0) {
-		read_lock(&ipfrag_lock);
-		if (list_empty(&ipq_lru_list)) {
-			read_unlock(&ipfrag_lock);
-			return;
-		}
-		tmp = ipq_lru_list.next;
-		qp = list_entry(tmp, struct ipq, lru_list);
-		atomic_inc(&qp->refcnt);
-		read_unlock(&ipfrag_lock);
-
-		spin_lock(&qp->lock);
-		if (!(qp->last_in&COMPLETE))
-			ipq_kill(qp);
-		spin_unlock(&qp->lock);
-
-		ipq_put(qp, &work);
-		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	}
+	evicted = inet_frag_evictor(&ip4_frags);
+	if (evicted)
+		IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 /*
@@ -279,9 +187,9 @@
 {
 	struct ipq *qp = (struct ipq *) arg;
 
-	spin_lock(&qp->lock);
+	spin_lock(&qp->q.lock);
 
-	if (qp->last_in & COMPLETE)
+	if (qp->q.last_in & COMPLETE)
 		goto out;
 
 	ipq_kill(qp);
@@ -289,8 +197,8 @@
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
 
-	if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
-		struct sk_buff *head = qp->fragments;
+	if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
+		struct sk_buff *head = qp->q.fragments;
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
 			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -298,8 +206,8 @@
 		}
 	}
 out:
-	spin_unlock(&qp->lock);
-	ipq_put(qp, NULL);
+	spin_unlock(&qp->q.lock);
+	ipq_put(qp);
 }
 
 /* Creation primitives. */
@@ -312,7 +220,7 @@
 #endif
 	unsigned int hash;
 
-	write_lock(&ipfrag_lock);
+	write_lock(&ip4_frags.lock);
 	hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
 			 qp_in->protocol);
 #ifdef CONFIG_SMP
@@ -320,31 +228,31 @@
 	 * such entry could be created on other cpu, while we
 	 * promoted read lock to write lock.
 	 */
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
 		if (qp->id == qp_in->id		&&
 		    qp->saddr == qp_in->saddr	&&
 		    qp->daddr == qp_in->daddr	&&
 		    qp->protocol == qp_in->protocol &&
 		    qp->user == qp_in->user) {
-			atomic_inc(&qp->refcnt);
-			write_unlock(&ipfrag_lock);
-			qp_in->last_in |= COMPLETE;
-			ipq_put(qp_in, NULL);
+			atomic_inc(&qp->q.refcnt);
+			write_unlock(&ip4_frags.lock);
+			qp_in->q.last_in |= COMPLETE;
+			ipq_put(qp_in);
 			return qp;
 		}
 	}
 #endif
 	qp = qp_in;
 
-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
-		atomic_inc(&qp->refcnt);
+	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
+		atomic_inc(&qp->q.refcnt);
 
-	atomic_inc(&qp->refcnt);
-	hlist_add_head(&qp->list, &ipq_hash[hash]);
-	INIT_LIST_HEAD(&qp->lru_list);
-	list_add_tail(&qp->lru_list, &ipq_lru_list);
-	ip_frag_nqueues++;
-	write_unlock(&ipfrag_lock);
+	atomic_inc(&qp->q.refcnt);
+	hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
+	INIT_LIST_HEAD(&qp->q.lru_list);
+	list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+	ip4_frags.nqueues++;
+	write_unlock(&ip4_frags.lock);
 	return qp;
 }
 
@@ -357,23 +265,18 @@
 		goto out_nomem;
 
 	qp->protocol = iph->protocol;
-	qp->last_in = 0;
 	qp->id = iph->id;
 	qp->saddr = iph->saddr;
 	qp->daddr = iph->daddr;
 	qp->user = user;
-	qp->len = 0;
-	qp->meat = 0;
-	qp->fragments = NULL;
-	qp->iif = 0;
 	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
-	init_timer(&qp->timer);
-	qp->timer.data = (unsigned long) qp;	/* pointer to queue	*/
-	qp->timer.function = ip_expire;		/* expire function	*/
-	spin_lock_init(&qp->lock);
-	atomic_set(&qp->refcnt, 1);
+	init_timer(&qp->q.timer);
+	qp->q.timer.data = (unsigned long) qp;	/* pointer to queue	*/
+	qp->q.timer.function = ip_expire;		/* expire function	*/
+	spin_lock_init(&qp->q.lock);
+	atomic_set(&qp->q.refcnt, 1);
 
 	return ip_frag_intern(qp);
 
@@ -395,20 +298,20 @@
 	struct ipq *qp;
 	struct hlist_node *n;
 
-	read_lock(&ipfrag_lock);
+	read_lock(&ip4_frags.lock);
 	hash = ipqhashfn(id, saddr, daddr, protocol);
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
 		if (qp->id == id		&&
 		    qp->saddr == saddr	&&
 		    qp->daddr == daddr	&&
 		    qp->protocol == protocol &&
 		    qp->user == user) {
-			atomic_inc(&qp->refcnt);
-			read_unlock(&ipfrag_lock);
+			atomic_inc(&qp->q.refcnt);
+			read_unlock(&ip4_frags.lock);
 			return qp;
 		}
 	}
-	read_unlock(&ipfrag_lock);
+	read_unlock(&ip4_frags.lock);
 
 	return ip_frag_create(iph, user);
 }
@@ -429,7 +332,7 @@
 	end = atomic_inc_return(&peer->rid);
 	qp->rid = end;
 
-	rc = qp->fragments && (end - start) > max;
+	rc = qp->q.fragments && (end - start) > max;
 
 	if (rc) {
 		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -442,39 +345,42 @@
 {
 	struct sk_buff *fp;
 
-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
-		atomic_inc(&qp->refcnt);
+	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
+		atomic_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
 
-	fp = qp->fragments;
+	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
 		frag_kfree_skb(fp, NULL);
 		fp = xp;
 	} while (fp);
 
-	qp->last_in = 0;
-	qp->len = 0;
-	qp->meat = 0;
-	qp->fragments = NULL;
+	qp->q.last_in = 0;
+	qp->q.len = 0;
+	qp->q.meat = 0;
+	qp->q.fragments = NULL;
 	qp->iif = 0;
 
 	return 0;
 }
 
 /* Add new segment to existing queue. */
-static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
 	struct sk_buff *prev, *next;
+	struct net_device *dev;
 	int flags, offset;
 	int ihl, end;
+	int err = -ENOENT;
 
-	if (qp->last_in & COMPLETE)
+	if (qp->q.last_in & COMPLETE)
 		goto err;
 
 	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
-	    unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+	    unlikely(ip_frag_too_far(qp)) &&
+	    unlikely(err = ip_frag_reinit(qp))) {
 		ipq_kill(qp);
 		goto err;
 	}
@@ -487,36 +393,40 @@
 
 	/* Determine the position of this fragment. */
 	end = offset + skb->len - ihl;
+	err = -EINVAL;
 
 	/* Is this the final fragment? */
 	if ((flags & IP_MF) == 0) {
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrrupted.
 		 */
-		if (end < qp->len ||
-		    ((qp->last_in & LAST_IN) && end != qp->len))
+		if (end < qp->q.len ||
+		    ((qp->q.last_in & LAST_IN) && end != qp->q.len))
 			goto err;
-		qp->last_in |= LAST_IN;
-		qp->len = end;
+		qp->q.last_in |= LAST_IN;
+		qp->q.len = end;
 	} else {
 		if (end&7) {
 			end &= ~7;
 			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 				skb->ip_summed = CHECKSUM_NONE;
 		}
-		if (end > qp->len) {
+		if (end > qp->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (qp->last_in & LAST_IN)
+			if (qp->q.last_in & LAST_IN)
 				goto err;
-			qp->len = end;
+			qp->q.len = end;
 		}
 	}
 	if (end == offset)
 		goto err;
 
+	err = -ENOMEM;
 	if (pskb_pull(skb, ihl) == NULL)
 		goto err;
-	if (pskb_trim_rcsum(skb, end-offset))
+
+	err = pskb_trim_rcsum(skb, end - offset);
+	if (err)
 		goto err;
 
 	/* Find out which fragments are in front and at the back of us
@@ -524,7 +434,7 @@
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for (next = qp->fragments; next != NULL; next = next->next) {
+	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -539,8 +449,10 @@
 
 		if (i > 0) {
 			offset += i;
+			err = -EINVAL;
 			if (end <= offset)
 				goto err;
+			err = -ENOMEM;
 			if (!pskb_pull(skb, i))
 				goto err;
 			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -548,6 +460,8 @@
 		}
 	}
 
+	err = -ENOMEM;
+
 	while (next && FRAG_CB(next)->offset < end) {
 		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
 
@@ -558,7 +472,7 @@
 			if (!pskb_pull(next, i))
 				goto err;
 			FRAG_CB(next)->offset += i;
-			qp->meat -= i;
+			qp->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
@@ -573,9 +487,9 @@
 			if (prev)
 				prev->next = next;
 			else
-				qp->fragments = next;
+				qp->q.fragments = next;
 
-			qp->meat -= free_it->len;
+			qp->q.meat -= free_it->len;
 			frag_kfree_skb(free_it, NULL);
 		}
 	}
@@ -587,50 +501,77 @@
 	if (prev)
 		prev->next = skb;
 	else
-		qp->fragments = skb;
+		qp->q.fragments = skb;
 
-	if (skb->dev)
-		qp->iif = skb->dev->ifindex;
-	skb->dev = NULL;
-	qp->stamp = skb->tstamp;
-	qp->meat += skb->len;
-	atomic_add(skb->truesize, &ip_frag_mem);
+	dev = skb->dev;
+	if (dev) {
+		qp->iif = dev->ifindex;
+		skb->dev = NULL;
+	}
+	qp->q.stamp = skb->tstamp;
+	qp->q.meat += skb->len;
+	atomic_add(skb->truesize, &ip4_frags.mem);
 	if (offset == 0)
-		qp->last_in |= FIRST_IN;
+		qp->q.last_in |= FIRST_IN;
 
-	write_lock(&ipfrag_lock);
-	list_move_tail(&qp->lru_list, &ipq_lru_list);
-	write_unlock(&ipfrag_lock);
+	if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
+		return ip_frag_reasm(qp, prev, dev);
 
-	return;
+	write_lock(&ip4_frags.lock);
+	list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+	write_unlock(&ip4_frags.lock);
+	return -EINPROGRESS;
 
 err:
 	kfree_skb(skb);
+	return err;
 }
 
 
 /* Build a new IP datagram from all its fragments. */
 
-static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+			 struct net_device *dev)
 {
 	struct iphdr *iph;
-	struct sk_buff *fp, *head = qp->fragments;
+	struct sk_buff *fp, *head = qp->q.fragments;
 	int len;
 	int ihlen;
+	int err;
 
 	ipq_kill(qp);
 
+	/* Make the one we just received the head. */
+	if (prev) {
+		head = prev->next;
+		fp = skb_clone(head, GFP_ATOMIC);
+
+		if (!fp)
+			goto out_nomem;
+
+		fp->next = head->next;
+		prev->next = fp;
+
+		skb_morph(head, qp->q.fragments);
+		head->next = qp->q.fragments->next;
+
+		kfree_skb(qp->q.fragments);
+		qp->q.fragments = head;
+	}
+
 	BUG_TRAP(head != NULL);
 	BUG_TRAP(FRAG_CB(head)->offset == 0);
 
 	/* Allocate a new buffer for the datagram. */
 	ihlen = ip_hdrlen(head);
-	len = ihlen + qp->len;
+	len = ihlen + qp->q.len;
 
+	err = -E2BIG;
 	if (len > 65535)
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
+	err = -ENOMEM;
 	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
 		goto out_nomem;
 
@@ -654,12 +595,12 @@
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip_frag_mem);
+		atomic_add(clone->truesize, &ip4_frags.mem);
 	}
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &ip_frag_mem);
+	atomic_sub(head->truesize, &ip4_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -669,19 +610,19 @@
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip_frag_mem);
+		atomic_sub(fp->truesize, &ip4_frags.mem);
 	}
 
 	head->next = NULL;
 	head->dev = dev;
-	head->tstamp = qp->stamp;
+	head->tstamp = qp->q.stamp;
 
 	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
-	qp->fragments = NULL;
-	return head;
+	qp->q.fragments = NULL;
+	return 0;
 
 out_nomem:
 	LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
@@ -694,54 +635,46 @@
 			NIPQUAD(qp->saddr));
 out_fail:
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	return NULL;
+	return err;
 }
 
 /* Process an incoming IP datagram fragment. */
-struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct sk_buff *skb, u32 user)
 {
 	struct ipq *qp;
-	struct net_device *dev;
 
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
 
 	/* Start by cleaning up the memory. */
-	if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+	if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
 		ip_evictor();
 
-	dev = skb->dev;
-
 	/* Lookup (or create) queue header */
 	if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
-		struct sk_buff *ret = NULL;
+		int ret;
 
-		spin_lock(&qp->lock);
+		spin_lock(&qp->q.lock);
 
-		ip_frag_queue(qp, skb);
+		ret = ip_frag_queue(qp, skb);
 
-		if (qp->last_in == (FIRST_IN|LAST_IN) &&
-		    qp->meat == qp->len)
-			ret = ip_frag_reasm(qp, dev);
-
-		spin_unlock(&qp->lock);
-		ipq_put(qp, NULL);
+		spin_unlock(&qp->q.lock);
+		ipq_put(qp);
 		return ret;
 	}
 
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
-	return NULL;
+	return -ENOMEM;
 }
 
 void __init ipfrag_init(void)
 {
-	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				 (jiffies ^ (jiffies >> 6)));
-
-	init_timer(&ipfrag_secret_timer);
-	ipfrag_secret_timer.function = ipfrag_secret_rebuild;
-	ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
-	add_timer(&ipfrag_secret_timer);
+	ip4_frags.ctl = &ip4_frags_ctl;
+	ip4_frags.hashfn = ip4_hashfn;
+	ip4_frags.destructor = ip4_frag_free;
+	ip4_frags.skb_free = NULL;
+	ip4_frags.qsize = sizeof(struct ipq);
+	inet_frags_init(&ip4_frags);
 }
 
 EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 41d8964..168c871 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -172,8 +172,7 @@
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
-				if (skb == NULL) {
+				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
 					read_unlock(&ip_ra_lock);
 					return 1;
 				}
@@ -196,7 +195,7 @@
 	return 0;
 }
 
-static inline int ip_local_deliver_finish(struct sk_buff *skb)
+static int ip_local_deliver_finish(struct sk_buff *skb)
 {
 	__skb_pull(skb, ip_hdrlen(skb));
 
@@ -265,8 +264,7 @@
 	 */
 
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
-		if (!skb)
+		if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
 			return 0;
 	}
 
@@ -326,7 +324,7 @@
 	return -1;
 }
 
-static inline int ip_rcv_finish(struct sk_buff *skb)
+static int ip_rcv_finish(struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 699f067..f508835 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -202,7 +202,7 @@
 	       skb->dst->dev->mtu : dst_mtu(skb->dst);
 }
 
-static inline int ip_finish_output(struct sk_buff *skb)
+static int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 341474e..664cb8e 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -25,6 +25,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/netfilter.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
@@ -328,18 +329,18 @@
 	spin_unlock(&cp->lock);
 }
 
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 				  struct ip_vs_app *app)
 {
 	int diff;
-	const unsigned int tcp_offset = ip_hdrlen(*pskb);
+	const unsigned int tcp_offset = ip_hdrlen(skb);
 	struct tcphdr *th;
 	__u32 seq;
 
-	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -360,7 +361,7 @@
 	if (app->pkt_out == NULL)
 		return 1;
 
-	if (!app->pkt_out(app, cp, pskb, &diff))
+	if (!app->pkt_out(app, cp, skb, &diff))
 		return 0;
 
 	/*
@@ -378,7 +379,7 @@
  *	called by ipvs packet handler, assumes previously checked cp!=NULL
  *	returns false if it can't handle packet (oom)
  */
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_app *app;
 
@@ -391,7 +392,7 @@
 
 	/* TCP is complicated */
 	if (cp->protocol == IPPROTO_TCP)
-		return app_tcp_pkt_out(cp, pskb, app);
+		return app_tcp_pkt_out(cp, skb, app);
 
 	/*
 	 *	Call private output hook function
@@ -399,22 +400,22 @@
 	if (app->pkt_out == NULL)
 		return 1;
 
-	return app->pkt_out(app, cp, pskb, NULL);
+	return app->pkt_out(app, cp, skb, NULL);
 }
 
 
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 				 struct ip_vs_app *app)
 {
 	int diff;
-	const unsigned int tcp_offset = ip_hdrlen(*pskb);
+	const unsigned int tcp_offset = ip_hdrlen(skb);
 	struct tcphdr *th;
 	__u32 seq;
 
-	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -435,7 +436,7 @@
 	if (app->pkt_in == NULL)
 		return 1;
 
-	if (!app->pkt_in(app, cp, pskb, &diff))
+	if (!app->pkt_in(app, cp, skb, &diff))
 		return 0;
 
 	/*
@@ -453,7 +454,7 @@
  *	called by ipvs packet handler, assumes previously checked cp!=NULL.
  *	returns false if can't handle packet (oom).
  */
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_app *app;
 
@@ -466,7 +467,7 @@
 
 	/* TCP is complicated */
 	if (cp->protocol == IPPROTO_TCP)
-		return app_tcp_pkt_in(cp, pskb, app);
+		return app_tcp_pkt_in(cp, skb, app);
 
 	/*
 	 *	Call private input hook function
@@ -474,7 +475,7 @@
 	if (app->pkt_in == NULL)
 		return 1;
 
-	return app->pkt_in(app, cp, pskb, NULL);
+	return app->pkt_in(app, cp, skb, NULL);
 }
 
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index fbca2a2..c6ed765 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -58,7 +58,6 @@
 #ifdef CONFIG_IP_VS_DEBUG
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
-EXPORT_SYMBOL(ip_vs_make_skb_writable);
 
 
 /* ID used in ICMP lookups */
@@ -163,42 +162,6 @@
 }
 
 
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
-{
-	struct sk_buff *skb = *pskb;
-
-	/* skb is already used, better copy skb and its payload */
-	if (unlikely(skb_shared(skb) || skb->sk))
-		goto copy_skb;
-
-	/* skb data is already used, copy it */
-	if (unlikely(skb_cloned(skb)))
-		goto copy_data;
-
-	return pskb_may_pull(skb, writable_len);
-
-  copy_data:
-	if (unlikely(writable_len > skb->len))
-		return 0;
-	return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-
-  copy_skb:
-	if (unlikely(writable_len > skb->len))
-		return 0;
-	skb = skb_copy(skb, GFP_ATOMIC);
-	if (!skb)
-		return 0;
-	BUG_ON(skb_is_nonlinear(skb));
-
-	/* Rest of kernel will get very unhappy if we pass it a
-	   suddenly-orphaned skbuff */
-	if ((*pskb)->sk)
-		skb_set_owner_w(skb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = skb;
-	return 1;
-}
-
 /*
  *  IPVS persistent scheduling function
  *  It creates a connection entry according to its template if exists,
@@ -525,12 +488,12 @@
  *      for VS/NAT.
  */
 static unsigned int ip_vs_post_routing(unsigned int hooknum,
-				       struct sk_buff **pskb,
+				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
 {
-	if (!((*pskb)->ipvs_property))
+	if (!skb->ipvs_property)
 		return NF_ACCEPT;
 	/* The packet was sent from IPVS, exit this chain */
 	return NF_STOP;
@@ -541,13 +504,14 @@
 	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
 }
 
-static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-	skb = ip_defrag(skb, user);
-	if (skb)
+	int err = ip_defrag(skb, user);
+
+	if (!err)
 		ip_send_check(ip_hdr(skb));
-	return skb;
+
+	return err;
 }
 
 /*
@@ -605,9 +569,8 @@
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  *	(Only used in VS/NAT)
  */
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
-	struct sk_buff *skb = *pskb;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
@@ -619,10 +582,8 @@
 
 	/* reassemble IP fragments */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
-		if (!skb)
+		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
 			return NF_STOLEN;
-		*pskb = skb;
 	}
 
 	iph = ip_hdr(skb);
@@ -690,9 +651,8 @@
 
 	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
 		offset += 2 * sizeof(__u16);
-	if (!ip_vs_make_skb_writable(pskb, offset))
+	if (!skb_make_writable(skb, offset))
 		goto out;
-	skb = *pskb;
 
 	ip_vs_nat_icmp(skb, pp, cp, 1);
 
@@ -724,11 +684,10 @@
  *      rewrite addresses of the packet and send it on its way...
  */
 static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	  const struct net_device *in, const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff  *skb = *pskb;
 	struct iphdr	*iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
@@ -741,11 +700,10 @@
 
 	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_out_icmp(pskb, &related);
+		int related, verdict = ip_vs_out_icmp(skb, &related);
 
 		if (related)
 			return verdict;
-		skb = *pskb;
 		iph = ip_hdr(skb);
 	}
 
@@ -756,11 +714,9 @@
 	/* reassemble IP fragments */
 	if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
 		     !pp->dont_defrag)) {
-		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
-		if (!skb)
+		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
 			return NF_STOLEN;
 		iph = ip_hdr(skb);
-		*pskb = skb;
 	}
 
 	ihl = iph->ihl << 2;
@@ -802,13 +758,12 @@
 
 	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
 
-	if (!ip_vs_make_skb_writable(pskb, ihl))
+	if (!skb_make_writable(skb, ihl))
 		goto drop;
 
 	/* mangle the packet */
-	if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
 		goto drop;
-	skb = *pskb;
 	ip_hdr(skb)->saddr = cp->vaddr;
 	ip_send_check(ip_hdr(skb));
 
@@ -818,9 +773,8 @@
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
-	if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
 		goto drop;
-	skb = *pskb;
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
@@ -835,7 +789,7 @@
 
   drop:
 	ip_vs_conn_put(cp);
-	kfree_skb(*pskb);
+	kfree_skb(skb);
 	return NF_STOLEN;
 }
 
@@ -847,9 +801,8 @@
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  */
 static int
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
-	struct sk_buff *skb = *pskb;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
@@ -861,12 +814,9 @@
 
 	/* reassemble IP fragments */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		skb = ip_vs_gather_frags(skb,
-					 hooknum == NF_IP_LOCAL_IN ?
-					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
-		if (!skb)
+		if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ?
+					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
 			return NF_STOLEN;
-		*pskb = skb;
 	}
 
 	iph = ip_hdr(skb);
@@ -945,11 +895,10 @@
  *	and send it on its way...
  */
 static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 const struct net_device *in, const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff	*skb = *pskb;
 	struct iphdr	*iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
@@ -971,11 +920,10 @@
 
 	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
 
 		if (related)
 			return verdict;
-		skb = *pskb;
 		iph = ip_hdr(skb);
 	}
 
@@ -1056,16 +1004,16 @@
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
 	int r;
 
-	if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
+	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(pskb, &r, hooknum);
+	return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
 
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 344ddbb..59aa166 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -30,6 +30,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/netfilter.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <asm/unaligned.h>
@@ -135,7 +136,7 @@
  * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
  */
 static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
-			 struct sk_buff **pskb, int *diff)
+			 struct sk_buff *skb, int *diff)
 {
 	struct iphdr *iph;
 	struct tcphdr *th;
@@ -155,14 +156,14 @@
 		return 1;
 
 	/* Linear packets are much easier to deal with. */
-	if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	if (cp->app_data == &ip_vs_ftp_pasv) {
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 		data = (char *)th + (th->doff << 2);
-		data_limit = skb_tail_pointer(*pskb);
+		data_limit = skb_tail_pointer(skb);
 
 		if (ip_vs_ftp_get_addrport(data, data_limit,
 					   SERVER_STRING,
@@ -213,7 +214,7 @@
 			memcpy(start, buf, buf_len);
 			ret = 1;
 		} else {
-			ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
+			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
 					  end-start, buf, buf_len);
 		}
 
@@ -238,7 +239,7 @@
  * the client.
  */
 static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
-			struct sk_buff **pskb, int *diff)
+			struct sk_buff *skb, int *diff)
 {
 	struct iphdr *iph;
 	struct tcphdr *th;
@@ -256,20 +257,20 @@
 		return 1;
 
 	/* Linear packets are much easier to deal with. */
-	if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	/*
 	 * Detecting whether it is passive
 	 */
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 
 	/* Since there may be OPTIONS in the TCP packet and the HLEN is
 	   the length of the header in 32-bit multiples, it is accurate
 	   to calculate data address by th+HLEN*4 */
 	data = data_start = (char *)th + (th->doff << 2);
-	data_limit = skb_tail_pointer(*pskb);
+	data_limit = skb_tail_pointer(skb);
 
 	while (data <= data_limit - 6) {
 		if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index e65577a..12dc0d6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -20,6 +20,7 @@
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip_vs.h>
@@ -122,27 +123,27 @@
 
 
 static int
-tcp_snat_handler(struct sk_buff **pskb,
+tcp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	const unsigned int tcphoff = ip_hdrlen(*pskb);
+	const unsigned int tcphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_out(cp, pskb))
+		if (!ip_vs_app_pkt_out(cp, skb))
 			return 0;
 	}
 
-	tcph = (void *)ip_hdr(*pskb) + tcphoff;
+	tcph = (void *)ip_hdr(skb) + tcphoff;
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
@@ -150,17 +151,15 @@
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		tcph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, tcphoff,
-					     (*pskb)->len - tcphoff, 0);
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 		tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-						(*pskb)->len - tcphoff,
-						cp->protocol,
-						(*pskb)->csum);
+						skb->len - tcphoff,
+						cp->protocol, skb->csum);
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 			  pp->name, tcph->check,
 			  (char*)&(tcph->check) - (char*)tcph);
@@ -170,30 +169,30 @@
 
 
 static int
-tcp_dnat_handler(struct sk_buff **pskb,
+tcp_dnat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	const unsigned int tcphoff = ip_hdrlen(*pskb);
+	const unsigned int tcphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/*
 		 *	Attempt ip_vs_app call.
 		 *	It will fix ip_vs_conn and iph ack_seq stuff
 		 */
-		if (!ip_vs_app_pkt_in(cp, pskb))
+		if (!ip_vs_app_pkt_in(cp, skb))
 			return 0;
 	}
 
-	tcph = (void *)ip_hdr(*pskb) + tcphoff;
+	tcph = (void *)ip_hdr(skb) + tcphoff;
 	tcph->dest = cp->dport;
 
 	/*
@@ -203,18 +202,16 @@
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		tcph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, tcphoff,
-					     (*pskb)->len - tcphoff, 0);
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 		tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-						(*pskb)->len - tcphoff,
-						cp->protocol,
-						(*pskb)->csum);
-		(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+						skb->len - tcphoff,
+						cp->protocol, skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	return 1;
 }
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ee5fe6..1fa7b33 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -18,6 +18,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/kernel.h>
+#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/udp.h>
 
@@ -129,29 +130,29 @@
 }
 
 static int
-udp_snat_handler(struct sk_buff **pskb,
+udp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	const unsigned int udphoff = ip_hdrlen(*pskb);
+	const unsigned int udphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/*
 		 *	Call application helper if needed
 		 */
-		if (!ip_vs_app_pkt_out(cp, pskb))
+		if (!ip_vs_app_pkt_out(cp, skb))
 			return 0;
 	}
 
-	udph = (void *)ip_hdr(*pskb) + udphoff;
+	udph = (void *)ip_hdr(skb) + udphoff;
 	udph->source = cp->vport;
 
 	/*
@@ -161,17 +162,15 @@
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		udph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, udphoff,
-					     (*pskb)->len - udphoff, 0);
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 		udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-						(*pskb)->len - udphoff,
-						cp->protocol,
-						(*pskb)->csum);
+						skb->len - udphoff,
+						cp->protocol, skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -183,30 +182,30 @@
 
 
 static int
-udp_dnat_handler(struct sk_buff **pskb,
+udp_dnat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = ip_hdrlen(*pskb);
+	unsigned int udphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/*
 		 *	Attempt ip_vs_app call.
 		 *	It will fix ip_vs_conn
 		 */
-		if (!ip_vs_app_pkt_in(cp, pskb))
+		if (!ip_vs_app_pkt_in(cp, skb))
 			return 0;
 	}
 
-	udph = (void *)ip_hdr(*pskb) + udphoff;
+	udph = (void *)ip_hdr(skb) + udphoff;
 	udph->dest = cp->dport;
 
 	/*
@@ -216,20 +215,18 @@
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		udph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, udphoff,
-					     (*pskb)->len - udphoff, 0);
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 		udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-						(*pskb)->len - udphoff,
-						cp->protocol,
-						(*pskb)->csum);
+						skb->len - udphoff,
+						cp->protocol, skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
-		(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	return 1;
 }
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 666e080..d0a92de 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -253,7 +253,7 @@
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
+	if (!skb_make_writable(skb, sizeof(struct iphdr)))
 		goto tx_error_put;
 
 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
@@ -264,7 +264,7 @@
 	skb->dst = &rt->u.dst;
 
 	/* mangle the packet */
-	if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 		goto tx_error;
 	ip_hdr(skb)->daddr = cp->daddr;
 	ip_send_check(ip_hdr(skb));
@@ -529,7 +529,7 @@
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!ip_vs_make_skb_writable(&skb, offset))
+	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b441929..5539deb 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -3,14 +3,15 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/ip.h>
+#include <linux/skbuff.h>
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 {
-	const struct iphdr *iph = ip_hdr(*pskb);
+	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
 	struct dst_entry *odst;
@@ -29,14 +30,14 @@
 		if (type == RTN_LOCAL)
 			fl.nl_u.ip4_u.saddr = iph->saddr;
 		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-		fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-		fl.mark = (*pskb)->mark;
+		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+		fl.mark = skb->mark;
 		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
 		/* Drop old route. */
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = &rt->u.dst;
+		dst_release(skb->dst);
+		skb->dst = &rt->u.dst;
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -44,8 +45,8 @@
 		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
-		odst = (*pskb)->dst;
-		if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+		odst = skb->dst;
+		if (ip_route_input(skb, iph->daddr, iph->saddr,
 				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
 			return -1;
@@ -54,70 +55,54 @@
 		dst_release(odst);
 	}
 
-	if ((*pskb)->dst->error)
+	if (skb->dst->error)
 		return -1;
 
 #ifdef CONFIG_XFRM
-	if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-	    xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
-		if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, &fl, AF_INET) == 0)
+		if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
 			return -1;
 #endif
 
 	/* Change in oif may mean change in hh_len. */
-	hh_len = (*pskb)->dst->dev->hard_header_len;
-	if (skb_headroom(*pskb) < hh_len) {
-		struct sk_buff *nskb;
-
-		nskb = skb_realloc_headroom(*pskb, hh_len);
-		if (!nskb)
-			return -1;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
+	hh_len = skb->dst->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -1;
 
 	return 0;
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
 #ifdef CONFIG_XFRM
-int ip_xfrm_me_harder(struct sk_buff **pskb)
+int ip_xfrm_me_harder(struct sk_buff *skb)
 {
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
 
-	if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
 		return 0;
-	if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+	if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
 		return -1;
 
-	dst = (*pskb)->dst;
+	dst = skb->dst;
 	if (dst->xfrm)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+	if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0)
 		return -1;
 
-	dst_release((*pskb)->dst);
-	(*pskb)->dst = dst;
+	dst_release(skb->dst);
+	skb->dst = dst;
 
 	/* Change in oif may mean change in hh_len. */
-	hh_len = (*pskb)->dst->dev->hard_header_len;
-	if (skb_headroom(*pskb) < hh_len) {
-		struct sk_buff *nskb;
-
-		nskb = skb_realloc_headroom(*pskb, hh_len);
-		if (!nskb)
-			return -1;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
+	hh_len = skb->dst->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -1;
 	return 0;
 }
 EXPORT_SYMBOL(ip_xfrm_me_harder);
@@ -150,17 +135,17 @@
 	}
 }
 
-static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info)
 {
 	const struct ip_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP_LOCAL_OUT) {
-		const struct iphdr *iph = ip_hdr(*pskb);
+		const struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->tos == rt_info->tos
 		      && iph->daddr == rt_info->daddr
 		      && iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(pskb, RTN_UNSPEC);
+			return ip_route_me_harder(skb, RTN_UNSPEC);
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 29114a9..2909c92 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -197,7 +197,7 @@
 	return 1;
 }
 
-static unsigned int arpt_error(struct sk_buff **pskb,
+static unsigned int arpt_error(struct sk_buff *skb,
 			       const struct net_device *in,
 			       const struct net_device *out,
 			       unsigned int hooknum,
@@ -215,7 +215,7 @@
 	return (struct arpt_entry *)(base + offset);
 }
 
-unsigned int arpt_do_table(struct sk_buff **pskb,
+unsigned int arpt_do_table(struct sk_buff *skb,
 			   unsigned int hook,
 			   const struct net_device *in,
 			   const struct net_device *out,
@@ -231,9 +231,9 @@
 	struct xt_table_info *private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
-	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
-				     (2 * (*pskb)->dev->addr_len) +
-				     (2 * sizeof(u32)))))
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * skb->dev->addr_len) +
+				 (2 * sizeof(u32)))))
 		return NF_DROP;
 
 	indev = in ? in->name : nulldevname;
@@ -245,14 +245,14 @@
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	arp = arp_hdr(*pskb);
+	arp = arp_hdr(skb);
 	do {
-		if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
+		if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
 			struct arpt_entry_target *t;
 			int hdr_len;
 
 			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
-				(2 * (*pskb)->dev->addr_len);
+				(2 * skb->dev->addr_len);
 			ADD_COUNTER(e->counters, hdr_len, 1);
 
 			t = arpt_get_target(e);
@@ -290,14 +290,14 @@
 				/* Targets which reenter must return
 				 * abs. verdicts
 				 */
-				verdict = t->u.kernel.target->target(pskb,
+				verdict = t->u.kernel.target->target(skb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
 								     t->data);
 
 				/* Target might have changed stuff. */
-				arp = arp_hdr(*pskb);
+				arp = arp_hdr(skb);
 
 				if (verdict == ARPT_CONTINUE)
 					e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index c4bdab4..45fa4e2 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -1,5 +1,6 @@
 /* module that allows mangling of the arp payload */
 #include <linux/module.h>
+#include <linux/netfilter.h>
 #include <linux/netfilter_arp/arpt_mangle.h>
 #include <net/sock.h>
 
@@ -8,7 +9,7 @@
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in, const struct net_device *out,
        unsigned int hooknum, const struct xt_target *target,
        const void *targinfo)
@@ -18,47 +19,38 @@
 	unsigned char *arpptr;
 	int pln, hln;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, skb->len))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-
-	arp = arp_hdr(*pskb);
-	arpptr = skb_network_header(*pskb) + sizeof(*arp);
+	arp = arp_hdr(skb);
+	arpptr = skb_network_header(skb) + sizeof(*arp);
 	pln = arp->ar_pln;
 	hln = arp->ar_hln;
 	/* We assume that pln and hln were checked in the match */
 	if (mangle->flags & ARPT_MANGLE_SDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > skb_tail_pointer(*pskb)))
+		   (arpptr + hln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->src_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_SIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > skb_tail_pointer(*pskb)))
+		   (arpptr + pln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_s.src_ip, pln);
 	}
 	arpptr += pln;
 	if (mangle->flags & ARPT_MANGLE_TDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > skb_tail_pointer(*pskb)))
+		   (arpptr + hln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->tgt_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_TIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > skb_tail_pointer(*pskb)))
+		   (arpptr + pln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
 	}
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 75c0230..302d3da 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -56,12 +56,12 @@
 
 /* The work comes in here from netfilter.c */
 static unsigned int arpt_hook(unsigned int hook,
-			      struct sk_buff **pskb,
+			      struct sk_buff *skb,
 			      const struct net_device *in,
 			      const struct net_device *out,
 			      int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(pskb, hook, in, out, &packet_filter);
+	return arpt_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 23cbfc7..10a2ce0 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -335,6 +335,7 @@
 ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 {
 	int diff;
+	int err;
 	struct iphdr *user_iph = (struct iphdr *)v->payload;
 
 	if (v->data_len < sizeof(*user_iph))
@@ -347,25 +348,18 @@
 		if (v->data_len > 0xFFFF)
 			return -EINVAL;
 		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-
-			newskb = skb_copy_expand(e->skb,
-						 skb_headroom(e->skb),
-						 diff,
-						 GFP_ATOMIC);
-			if (newskb == NULL) {
-				printk(KERN_WARNING "ip_queue: OOM "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
+			err = pskb_expand_head(e->skb, 0,
+					       diff - skb_tailroom(e->skb),
+					       GFP_ATOMIC);
+			if (err) {
+				printk(KERN_WARNING "ip_queue: error "
+				      "in mangle, dropping packet: %d\n", -err);
+				return err;
 			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(&e->skb, v->data_len))
+	if (!skb_make_writable(e->skb, v->data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6486894..4b10b98 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -169,7 +169,7 @@
 }
 
 static unsigned int
-ipt_error(struct sk_buff **pskb,
+ipt_error(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -312,7 +312,7 @@
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ipt_do_table(struct sk_buff **pskb,
+ipt_do_table(struct sk_buff *skb,
 	     unsigned int hook,
 	     const struct net_device *in,
 	     const struct net_device *out,
@@ -331,8 +331,8 @@
 	struct xt_table_info *private;
 
 	/* Initialization */
-	ip = ip_hdr(*pskb);
-	datalen = (*pskb)->len - ip->ihl * 4;
+	ip = ip_hdr(skb);
+	datalen = skb->len - ip->ihl * 4;
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 	/* We handle fragments by dealing with the first fragment as
@@ -359,7 +359,7 @@
 			struct ipt_entry_target *t;
 
 			if (IPT_MATCH_ITERATE(e, do_match,
-					      *pskb, in, out,
+					      skb, in, out,
 					      offset, &hotdrop) != 0)
 				goto no_match;
 
@@ -371,8 +371,8 @@
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 			/* The packet is traced: log it */
-			if (unlikely((*pskb)->nf_trace))
-				trace_packet(*pskb, hook, in, out,
+			if (unlikely(skb->nf_trace))
+				trace_packet(skb, hook, in, out,
 					     table->name, private, e);
 #endif
 			/* Standard target? */
@@ -410,7 +410,7 @@
 				((struct ipt_entry *)table_base)->comefrom
 					= 0xeeeeeeec;
 #endif
-				verdict = t->u.kernel.target->target(pskb,
+				verdict = t->u.kernel.target->target(skb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
@@ -428,8 +428,8 @@
 					= 0x57acc001;
 #endif
 				/* Target might have changed stuff. */
-				ip = ip_hdr(*pskb);
-				datalen = (*pskb)->len - ip->ihl * 4;
+				ip = ip_hdr(skb);
+				datalen = skb->len - ip->ihl * 4;
 
 				if (verdict == IPT_CONTINUE)
 					e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 27f14e1..2f544da 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -289,7 +289,7 @@
  ***********************************************************************/
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -305,7 +305,7 @@
 	 * is only decremented by destroy() - and ip_tables guarantees
 	 * that the ->target() function isn't called after ->destroy() */
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (ct == NULL) {
 		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
@@ -316,7 +316,7 @@
 
 	/* special case: ICMP error handling. conntrack distinguishes between
 	 * error messages (RELATED) and information requests (see below) */
-	if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
+	if (ip_hdr(skb)->protocol == IPPROTO_ICMP
 	    && (ctinfo == IP_CT_RELATED
 		|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
 		return XT_CONTINUE;
@@ -325,7 +325,7 @@
 	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
 	 * on, which all have an ID field [relevant for hashing]. */
 
-	hash = clusterip_hashfn(*pskb, cipinfo->config);
+	hash = clusterip_hashfn(skb, cipinfo->config);
 
 	switch (ctinfo) {
 		case IP_CT_NEW:
@@ -355,7 +355,7 @@
 
 	/* despite being received via linklayer multicast, this is
 	 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
-	(*pskb)->pkt_type = PACKET_HOST;
+	skb->pkt_type = PACKET_HOST;
 
 	return XT_CONTINUE;
 }
@@ -505,12 +505,12 @@
 
 static unsigned int
 arp_mangle(unsigned int hook,
-	   struct sk_buff **pskb,
+	   struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
-	struct arphdr *arp = arp_hdr(*pskb);
+	struct arphdr *arp = arp_hdr(skb);
 	struct arp_payload *payload;
 	struct clusterip_config *c;
 
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index f1253bd..add1100 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -26,15 +26,15 @@
 /* set ECT codepoint from IP header.
  * 	return false if there was an error. */
 static inline bool
-set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 {
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		__u8 oldtos;
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return false;
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -45,14 +45,13 @@
 
 /* Return false if there was an error. */
 static inline bool
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
 	__be16 oldval;
 
 	/* Not enought header? */
-	tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-				  sizeof(_tcph), &_tcph);
+	tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (!tcph)
 		return false;
 
@@ -62,9 +61,9 @@
 	     tcph->cwr == einfo->proto.tcp.cwr))
 		return true;
 
-	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+	if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
 		return false;
-	tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
+	tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
 
 	oldval = ((__be16 *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -72,13 +71,13 @@
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
 
-	nf_proto_csum_replace2(&tcph->check, *pskb,
+	nf_proto_csum_replace2(&tcph->check, skb,
 				oldval, ((__be16 *)tcph)[6], 0);
 	return true;
 }
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -88,12 +87,12 @@
 	const struct ipt_ECN_info *einfo = targinfo;
 
 	if (einfo->operation & IPT_ECN_OP_SET_IP)
-		if (!set_ect_ip(pskb, einfo))
+		if (!set_ect_ip(skb, einfo))
 			return NF_DROP;
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
-	    && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
-		if (!set_ect_tcp(pskb, einfo))
+	    && ip_hdr(skb)->protocol == IPPROTO_TCP)
+		if (!set_ect_tcp(skb, einfo))
 			return NF_DROP;
 
 	return XT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 127a5e8..4b5e821 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -418,7 +418,7 @@
 }
 
 static unsigned int
-ipt_log_target(struct sk_buff **pskb,
+ipt_log_target(struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
 	       unsigned int hooknum,
@@ -432,7 +432,7 @@
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+	ipt_log_packet(PF_INET, hooknum, skb, in, out, &li,
 		       loginfo->prefix);
 	return XT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 3e0b562..44b516e 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -52,7 +52,7 @@
 }
 
 static unsigned int
-masquerade_target(struct sk_buff **pskb,
+masquerade_target(struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  unsigned int hooknum,
@@ -69,7 +69,7 @@
 
 	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	nat = nfct_nat(ct);
 
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
@@ -82,7 +82,7 @@
 		return NF_ACCEPT;
 
 	mr = targinfo;
-	rt = (struct rtable *)(*pskb)->dst;
+	rt = (struct rtable *)skb->dst;
 	newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
 	if (!newsrc) {
 		printk("MASQUERADE: %s ate my IP address\n", out->name);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 41a011d..f869929 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -43,7 +43,7 @@
 }
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -59,14 +59,14 @@
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_POST_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
 	if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
-		new_ip = ip_hdr(*pskb)->daddr & ~netmask;
+		new_ip = ip_hdr(skb)->daddr & ~netmask;
 	else
-		new_ip = ip_hdr(*pskb)->saddr & ~netmask;
+		new_ip = ip_hdr(skb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
 	newrange = ((struct nf_nat_range)
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 6ac7a23..f7cf7d6 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -47,7 +47,7 @@
 }
 
 static unsigned int
-redirect_target(struct sk_buff **pskb,
+redirect_target(struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		unsigned int hooknum,
@@ -63,7 +63,7 @@
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	/* Local packets: make them go to loopback */
@@ -76,7 +76,7 @@
 		newdst = 0;
 
 		rcu_read_lock();
-		indev = __in_dev_get_rcu((*pskb)->dev);
+		indev = __in_dev_get_rcu(skb->dev);
 		if (indev && (ifa = indev->ifa_list))
 			newdst = ifa->ifa_local;
 		rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cb038c8..dcf4d21 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -131,7 +131,7 @@
 	   )
 		addr_type = RTN_LOCAL;
 
-	if (ip_route_me_harder(&nskb, addr_type))
+	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
 	nskb->ip_summed = CHECKSUM_NONE;
@@ -162,7 +162,7 @@
 	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 }
 
-static unsigned int reject(struct sk_buff **pskb,
+static unsigned int reject(struct sk_buff *skb,
 			   const struct net_device *in,
 			   const struct net_device *out,
 			   unsigned int hooknum,
@@ -173,7 +173,7 @@
 
 	/* Our naive response construction doesn't deal with IP
 	   options, and probably shouldn't try. */
-	if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
+	if (ip_hdrlen(skb) != sizeof(struct iphdr))
 		return NF_DROP;
 
 	/* WARNING: This code causes reentry within iptables.
@@ -181,28 +181,28 @@
 	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
-		send_unreach(*pskb, ICMP_NET_UNREACH);
+		send_unreach(skb, ICMP_NET_UNREACH);
 		break;
 	case IPT_ICMP_HOST_UNREACHABLE:
-		send_unreach(*pskb, ICMP_HOST_UNREACH);
+		send_unreach(skb, ICMP_HOST_UNREACH);
 		break;
 	case IPT_ICMP_PROT_UNREACHABLE:
-		send_unreach(*pskb, ICMP_PROT_UNREACH);
+		send_unreach(skb, ICMP_PROT_UNREACH);
 		break;
 	case IPT_ICMP_PORT_UNREACHABLE:
-		send_unreach(*pskb, ICMP_PORT_UNREACH);
+		send_unreach(skb, ICMP_PORT_UNREACH);
 		break;
 	case IPT_ICMP_NET_PROHIBITED:
-		send_unreach(*pskb, ICMP_NET_ANO);
+		send_unreach(skb, ICMP_NET_ANO);
 		break;
 	case IPT_ICMP_HOST_PROHIBITED:
-		send_unreach(*pskb, ICMP_HOST_ANO);
+		send_unreach(skb, ICMP_HOST_ANO);
 		break;
 	case IPT_ICMP_ADMIN_PROHIBITED:
-		send_unreach(*pskb, ICMP_PKT_FILTERED);
+		send_unreach(skb, ICMP_PKT_FILTERED);
 		break;
 	case IPT_TCP_RESET:
-		send_reset(*pskb, hooknum);
+		send_reset(skb, hooknum);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 97641f1..8988571 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -104,7 +104,7 @@
 }
 
 static unsigned int
-same_target(struct sk_buff **pskb,
+same_target(struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		unsigned int hooknum,
@@ -121,7 +121,7 @@
 
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 			hooknum == NF_IP_POST_ROUTING);
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 25f5d0b..d4573ba 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -21,7 +21,7 @@
 MODULE_DESCRIPTION("iptables TOS mangling module");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -29,13 +29,13 @@
        const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 
 	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		__u8 oldtos;
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		oldtos = iph->tos;
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
 		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 2b54e7b..c620a05 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,7 +20,7 @@
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ipt_ttl_target(struct sk_buff **pskb,
+ipt_ttl_target(struct sk_buff *skb,
 	       const struct net_device *in, const struct net_device *out,
 	       unsigned int hooknum, const struct xt_target *target,
 	       const void *targinfo)
@@ -29,10 +29,10 @@
 	const struct ipt_TTL_info *info = targinfo;
 	int new_ttl;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return NF_DROP;
 
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 
 	switch (info->mode) {
 		case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index c636d6d..212b830 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -279,7 +279,7 @@
 	spin_unlock_bh(&ulog_lock);
 }
 
-static unsigned int ipt_ulog_target(struct sk_buff **pskb,
+static unsigned int ipt_ulog_target(struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
@@ -288,7 +288,7 @@
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
 
-	ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
+	ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL);
 
 	return XT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 4f51c1d..ba3262c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -62,31 +62,31 @@
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter);
+	return ipt_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
 ipt_local_out_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("iptable_filter: ignoring short SOCK_RAW "
 			       "packet.\n");
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter);
+	return ipt_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 902446f..b4360a6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -75,17 +75,17 @@
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_route_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler);
+	return ipt_do_table(skb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
 ipt_local_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
@@ -97,8 +97,8 @@
 	u_int32_t mark;
 
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("iptable_mangle: ignoring short SOCK_RAW "
 			       "packet.\n");
@@ -106,22 +106,22 @@
 	}
 
 	/* Save things which could affect route */
-	mark = (*pskb)->mark;
-	iph = ip_hdr(*pskb);
+	mark = skb->mark;
+	iph = ip_hdr(skb);
 	saddr = iph->saddr;
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
+	ret = ipt_do_table(skb, hook, in, out, &packet_mangler);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 
 		if (iph->saddr != saddr ||
 		    iph->daddr != daddr ||
-		    (*pskb)->mark != mark ||
+		    skb->mark != mark ||
 		    iph->tos != tos)
-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
 				ret = NF_DROP;
 	}
 
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index d6e5033..5de6e57 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -47,30 +47,30 @@
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_raw);
+	return ipt_do_table(skb, hook, in, out, &packet_raw);
 }
 
 static unsigned int
 ipt_local_hook(unsigned int hook,
-	       struct sk_buff **pskb,
+	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
 	       int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("iptable_raw: ignoring short SOCK_RAW"
 			       "packet.\n");
 		return NF_ACCEPT;
 	}
-	return ipt_do_table(pskb, hook, in, out, &packet_raw);
+	return ipt_do_table(skb, hook, in, out, &packet_raw);
 }
 
 /* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2fcb924..831e9b2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,19 +63,20 @@
 }
 
 /* Returns new sk_buff, or NULL */
-static struct sk_buff *
-nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
+	int err;
+
 	skb_orphan(skb);
 
 	local_bh_disable();
-	skb = ip_defrag(skb, user);
+	err = ip_defrag(skb, user);
 	local_bh_enable();
 
-	if (skb)
+	if (!err)
 		ip_send_check(ip_hdr(skb));
 
-	return skb;
+	return err;
 }
 
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
@@ -99,17 +100,17 @@
 }
 
 static unsigned int ipv4_confirm(unsigned int hooknum,
-				 struct sk_buff **pskb,
+				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
 				 int (*okfn)(struct sk_buff *))
 {
 	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(pskb);
+	return nf_conntrack_confirm(skb);
 }
 
 static unsigned int ipv4_conntrack_help(unsigned int hooknum,
-				      struct sk_buff **pskb,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
@@ -120,7 +121,7 @@
 	struct nf_conntrack_helper *helper;
 
 	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
 		return NF_ACCEPT;
 
@@ -131,56 +132,55 @@
 	helper = rcu_dereference(help->helper);
 	if (!helper)
 		return NF_ACCEPT;
-	return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
 			    ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
-					  struct sk_buff **pskb,
+					  struct sk_buff *skb,
 					  const struct net_device *in,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
 	/* Previously seen (loopback)?  Ignore.  Do this before
 	   fragment check. */
-	if ((*pskb)->nfct)
+	if (skb->nfct)
 		return NF_ACCEPT;
 
 	/* Gather fragments. */
-	if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		*pskb = nf_ct_ipv4_gather_frags(*pskb,
-						hooknum == NF_IP_PRE_ROUTING ?
-						IP_DEFRAG_CONNTRACK_IN :
-						IP_DEFRAG_CONNTRACK_OUT);
-		if (!*pskb)
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (nf_ct_ipv4_gather_frags(skb,
+					    hooknum == NF_IP_PRE_ROUTING ?
+					    IP_DEFRAG_CONNTRACK_IN :
+					    IP_DEFRAG_CONNTRACK_OUT))
 			return NF_STOLEN;
 	}
 	return NF_ACCEPT;
 }
 
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
-				      struct sk_buff **pskb,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return nf_conntrack_in(PF_INET, hooknum, pskb);
+	return nf_conntrack_in(PF_INET, hooknum, skb);
 }
 
 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
-					 struct sk_buff **pskb,
+					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
 					 int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
-	return nf_conntrack_in(PF_INET, hooknum, pskb);
+	return nf_conntrack_in(PF_INET, hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index bd93a1d..35a5aa6 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -24,7 +24,7 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_nat_amanda");
 
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int matchoff,
 			 unsigned int matchlen,
@@ -53,7 +53,7 @@
 		return NF_DROP;
 
 	sprintf(buffer, "%u", port);
-	ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+	ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
 				       matchoff, matchlen,
 				       buffer, strlen(buffer));
 	if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 7221aa2..56e93f6 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -349,7 +349,7 @@
 /* Returns true if succeeded. */
 static int
 manip_pkt(u_int16_t proto,
-	  struct sk_buff **pskb,
+	  struct sk_buff *skb,
 	  unsigned int iphdroff,
 	  const struct nf_conntrack_tuple *target,
 	  enum nf_nat_manip_type maniptype)
@@ -357,19 +357,19 @@
 	struct iphdr *iph;
 	struct nf_nat_protocol *p;
 
-	if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
+	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
 		return 0;
 
-	iph = (void *)(*pskb)->data + iphdroff;
+	iph = (void *)skb->data + iphdroff;
 
 	/* Manipulate protcol part. */
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	p = __nf_nat_proto_find(proto);
-	if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
+	if (!p->manip_pkt(skb, iphdroff, target, maniptype))
 		return 0;
 
-	iph = (void *)(*pskb)->data + iphdroff;
+	iph = (void *)skb->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
@@ -385,7 +385,7 @@
 unsigned int nf_nat_packet(struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned int hooknum,
-			   struct sk_buff **pskb)
+			   struct sk_buff *skb)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
@@ -407,7 +407,7 @@
 		/* We are aiming to look like inverse of other direction. */
 		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
 
-		if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+		if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
 			return NF_DROP;
 	}
 	return NF_ACCEPT;
@@ -418,7 +418,7 @@
 int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned int hooknum,
-				  struct sk_buff **pskb)
+				  struct sk_buff *skb)
 {
 	struct {
 		struct icmphdr icmp;
@@ -426,24 +426,24 @@
 	} *inside;
 	struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple inner, target;
-	int hdrlen = ip_hdrlen(*pskb);
+	int hdrlen = ip_hdrlen(skb);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
 
-	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
+	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
 
-	inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+	inside = (void *)skb->data + ip_hdrlen(skb);
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
-	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
 		return 0;
 
 	/* Must be RELATED */
-	NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
-		     (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+	NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
+		     skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
 
 	/* Redirects on non-null nats must be dropped, else they'll
 	   start talking to each other without our translation, and be
@@ -458,15 +458,15 @@
 	}
 
 	pr_debug("icmp_reply_translation: translating error %p manip %u "
-		 "dir %s\n", *pskb, manip,
+		 "dir %s\n", skb, manip,
 		 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
-	if (!nf_ct_get_tuple(*pskb,
-			     ip_hdrlen(*pskb) + sizeof(struct icmphdr),
-			     (ip_hdrlen(*pskb) +
+	if (!nf_ct_get_tuple(skb,
+			     ip_hdrlen(skb) + sizeof(struct icmphdr),
+			     (ip_hdrlen(skb) +
 			      sizeof(struct icmphdr) + inside->ip.ihl * 4),
 			     (u_int16_t)AF_INET,
 			     inside->ip.protocol,
@@ -478,19 +478,19 @@
 	   pass all hooks (locally-generated ICMP).  Consider incoming
 	   packet: PREROUTING (DST manip), routing produces ICMP, goes
 	   through POSTROUTING (which must correct the DST manip). */
-	if (!manip_pkt(inside->ip.protocol, pskb,
-		       ip_hdrlen(*pskb) + sizeof(inside->icmp),
+	if (!manip_pkt(inside->ip.protocol, skb,
+		       ip_hdrlen(skb) + sizeof(inside->icmp),
 		       &ct->tuplehash[!dir].tuple,
 		       !manip))
 		return 0;
 
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+		inside = (void *)skb->data + ip_hdrlen(skb);
 		inside->icmp.checksum = 0;
 		inside->icmp.checksum =
-			csum_fold(skb_checksum(*pskb, hdrlen,
-					       (*pskb)->len - hdrlen, 0));
+			csum_fold(skb_checksum(skb, hdrlen,
+					       skb->len - hdrlen, 0));
 	}
 
 	/* Change outer to look the reply to an incoming packet
@@ -506,7 +506,7 @@
 
 	if (ct->status & statusbit) {
 		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-		if (!manip_pkt(0, pskb, 0, &target, manip))
+		if (!manip_pkt(0, skb, 0, &target, manip))
 			return 0;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 3663bd8..e1a16d3 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -28,7 +28,7 @@
 /* FIXME: Time out? --RR */
 
 static int
-mangle_rfc959_packet(struct sk_buff **pskb,
+mangle_rfc959_packet(struct sk_buff *skb,
 		     __be32 newip,
 		     u_int16_t port,
 		     unsigned int matchoff,
@@ -43,13 +43,13 @@
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
 }
 
 /* |1|132.235.1.2|6275| */
 static int
-mangle_eprt_packet(struct sk_buff **pskb,
+mangle_eprt_packet(struct sk_buff *skb,
 		   __be32 newip,
 		   u_int16_t port,
 		   unsigned int matchoff,
@@ -63,13 +63,13 @@
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
 }
 
 /* |1|132.235.1.2|6275| */
 static int
-mangle_epsv_packet(struct sk_buff **pskb,
+mangle_epsv_packet(struct sk_buff *skb,
 		   __be32 newip,
 		   u_int16_t port,
 		   unsigned int matchoff,
@@ -83,11 +83,11 @@
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
 }
 
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
+static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
 		       unsigned int, unsigned int, struct nf_conn *,
 		       enum ip_conntrack_info)
 = {
@@ -99,7 +99,7 @@
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int nf_nat_ftp(struct sk_buff **pskb,
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       enum nf_ct_ftp_type type,
 			       unsigned int matchoff,
@@ -132,7 +132,7 @@
 	if (port == 0)
 		return NF_DROP;
 
-	if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
+	if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
 		nf_ct_unexpect_related(exp);
 		return NF_DROP;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c1b059a..a868c8c 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -22,12 +22,12 @@
 #include <linux/netfilter/nf_conntrack_h323.h>
 
 /****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
+static int set_addr(struct sk_buff *skb,
 		    unsigned char **data, int dataoff,
 		    unsigned int addroff, __be32 ip, __be16 port)
 {
 	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	struct {
 		__be32 ip;
 		__be16 port;
@@ -38,8 +38,8 @@
 	buf.port = port;
 	addroff += dataoff;
 
-	if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
-		if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+		if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
@@ -49,14 +49,13 @@
 		}
 
 		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+		th = skb_header_pointer(skb, ip_hdrlen(skb),
 					sizeof(_tcph), &_tcph);
 		if (th == NULL)
 			return -1;
-		*data = (*pskb)->data + ip_hdrlen(*pskb) +
-		    th->doff * 4 + dataoff;
+		*data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
 	} else {
-		if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
@@ -67,36 +66,35 @@
 		/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
 		 * or pull everything in a linear buffer, so we can safely
 		 * use the skb pointers now */
-		*data = ((*pskb)->data + ip_hdrlen(*pskb) +
-			 sizeof(struct udphdr));
+		*data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
 	}
 
 	return 0;
 }
 
 /****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
+static int set_h225_addr(struct sk_buff *skb,
 			 unsigned char **data, int dataoff,
 			 TransportAddress *taddr,
 			 union nf_conntrack_address *addr, __be16 port)
 {
-	return set_addr(pskb, data, dataoff, taddr->ipAddress.ip,
+	return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
 			addr->ip, port);
 }
 
 /****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
+static int set_h245_addr(struct sk_buff *skb,
 			 unsigned char **data, int dataoff,
 			 H245_TransportAddress *taddr,
 			 union nf_conntrack_address *addr, __be16 port)
 {
-	return set_addr(pskb, data, dataoff,
+	return set_addr(skb, data, dataoff,
 			taddr->unicastAddress.iPAddress.network,
 			addr->ip, port);
 }
 
 /****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data,
 			TransportAddress *taddr, int count)
@@ -125,7 +123,7 @@
 					 NIPQUAD(addr.ip), port,
 					 NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
 					 info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &taddr[i],
+				return set_h225_addr(skb, data, 0, &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.dst.u3,
 						     info->sig_port[!dir]);
@@ -137,7 +135,7 @@
 					 NIPQUAD(addr.ip), port,
 					 NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip),
 					 info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &taddr[i],
+				return set_h225_addr(skb, data, 0, &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.src.u3,
 						     info->sig_port[!dir]);
@@ -149,7 +147,7 @@
 }
 
 /****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data,
 			TransportAddress *taddr, int count)
@@ -168,7 +166,7 @@
 				 NIPQUAD(addr.ip), ntohs(port),
 				 NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
 				 ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
-			return set_h225_addr(pskb, data, 0, &taddr[i],
+			return set_h225_addr(skb, data, 0, &taddr[i],
 					     &ct->tuplehash[!dir].tuple.dst.u3,
 					     ct->tuplehash[!dir].tuple.
 								dst.u.udp.port);
@@ -179,7 +177,7 @@
 }
 
 /****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff,
 			H245_TransportAddress *taddr,
@@ -244,7 +242,7 @@
 	}
 
 	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, taddr,
+	if (set_h245_addr(skb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons((port & htons(1)) ? nated_port + 1 :
 						    nated_port)) == 0) {
@@ -273,7 +271,7 @@
 }
 
 /****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
 		    unsigned char **data, int dataoff,
 		    H245_TransportAddress *taddr, __be16 port,
@@ -301,7 +299,7 @@
 	}
 
 	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, taddr,
+	if (set_h245_addr(skb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) < 0) {
 		nf_ct_unexpect_related(exp);
@@ -318,7 +316,7 @@
 }
 
 /****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
 		    unsigned char **data, int dataoff,
 		    TransportAddress *taddr, __be16 port,
@@ -351,7 +349,7 @@
 	}
 
 	/* Modify signal */
-	if (set_h225_addr(pskb, data, dataoff, taddr,
+	if (set_h225_addr(skb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) == 0) {
 		/* Save ports */
@@ -406,7 +404,7 @@
 }
 
 /****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
 		    unsigned char **data, TransportAddress *taddr, int idx,
 		    __be16 port, struct nf_conntrack_expect *exp)
@@ -439,7 +437,7 @@
 	}
 
 	/* Modify signal */
-	if (set_h225_addr(pskb, data, 0, &taddr[idx],
+	if (set_h225_addr(skb, data, 0, &taddr[idx],
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) == 0) {
 		/* Save ports */
@@ -450,7 +448,7 @@
 		if (idx > 0 &&
 		    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
 		    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr(pskb, data, 0, &taddr[0],
+			set_h225_addr(skb, data, 0, &taddr[0],
 				      &ct->tuplehash[!dir].tuple.dst.u3,
 				      info->sig_port[!dir]);
 		}
@@ -495,7 +493,7 @@
 }
 
 /****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 			      enum ip_conntrack_info ctinfo,
 			      unsigned char **data, int dataoff,
 			      TransportAddress *taddr, __be16 port,
@@ -525,7 +523,7 @@
 	}
 
 	/* Modify signal */
-	if (!set_h225_addr(pskb, data, dataoff, taddr,
+	if (!set_h225_addr(skb, data, dataoff, taddr,
 			   &ct->tuplehash[!dir].tuple.dst.u3,
 			   htons(nated_port)) == 0) {
 		nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 93d8a0a..8718da0 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -111,22 +111,14 @@
 }
 
 /* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 {
-	struct sk_buff *nskb;
-
-	if ((*pskb)->len + extra > 65535)
+	if (skb->len + extra > 65535)
 		return 0;
 
-	nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
-	if (!nskb)
+	if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
 		return 0;
 
-	/* Transfer socket to new skb. */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
 	return 1;
 }
 
@@ -139,7 +131,7 @@
  *
  * */
 int
-nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
+nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 			 struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int match_offset,
@@ -147,37 +139,37 @@
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct iphdr *iph;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	if (rep_len > match_len &&
-	    rep_len - match_len > skb_tailroom(*pskb) &&
-	    !enlarge_skb(pskb, rep_len - match_len))
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
 		return 0;
 
-	SKB_LINEAR_ASSERT(*pskb);
+	SKB_LINEAR_ASSERT(skb);
 
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 	tcph = (void *)iph + iph->ihl*4;
 
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+	oldlen = skb->len - iph->ihl*4;
+	mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
 			match_offset, match_len, rep_buffer, rep_len);
 
-	datalen = (*pskb)->len - iph->ihl*4;
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+	datalen = skb->len - iph->ihl*4;
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
-			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
-			(*pskb)->csum_start = skb_headroom(*pskb) +
-					      skb_network_offset(*pskb) +
-					      iph->ihl * 4;
-			(*pskb)->csum_offset = offsetof(struct tcphdr, check);
+		    skb->dev->features & NETIF_F_V4_CSUM) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  iph->ihl * 4;
+			skb->csum_offset = offsetof(struct tcphdr, check);
 			tcph->check = ~tcp_v4_check(datalen,
 						    iph->saddr, iph->daddr, 0);
 		} else {
@@ -188,7 +180,7 @@
 								datalen, 0));
 		}
 	} else
-		nf_proto_csum_replace2(&tcph->check, *pskb,
+		nf_proto_csum_replace2(&tcph->check, skb,
 				       htons(oldlen), htons(datalen), 1);
 
 	if (rep_len != match_len) {
@@ -197,7 +189,7 @@
 				    (int)rep_len - (int)match_len,
 				    ct, ctinfo);
 		/* Tell TCP window tracking about seq change */
-		nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
+		nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
 					ct, CTINFO2DIR(ctinfo));
 	}
 	return 1;
@@ -215,7 +207,7 @@
  *       should be fairly easy to do.
  */
 int
-nf_nat_mangle_udp_packet(struct sk_buff **pskb,
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
 			 struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int match_offset,
@@ -223,48 +215,48 @@
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct iphdr *iph;
 	struct udphdr *udph;
 	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = ip_hdr(*pskb);
-	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
+	iph = ip_hdr(skb);
+	if (skb->len < iph->ihl*4 + sizeof(*udph) +
 			       match_offset + match_len)
 		return 0;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	if (rep_len > match_len &&
-	    rep_len - match_len > skb_tailroom(*pskb) &&
-	    !enlarge_skb(pskb, rep_len - match_len))
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
 		return 0;
 
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 	udph = (void *)iph + iph->ihl*4;
 
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+	oldlen = skb->len - iph->ihl*4;
+	mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
 			match_offset, match_len, rep_buffer, rep_len);
 
 	/* update the length of the UDP packet */
-	datalen = (*pskb)->len - iph->ihl*4;
+	datalen = skb->len - iph->ihl*4;
 	udph->len = htons(datalen);
 
 	/* fix udp checksum if udp checksum was previously calculated */
-	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 		return 1;
 
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
-			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
-			(*pskb)->csum_start = skb_headroom(*pskb) +
-					      skb_network_offset(*pskb) +
-					      iph->ihl * 4;
-			(*pskb)->csum_offset = offsetof(struct udphdr, check);
+		    skb->dev->features & NETIF_F_V4_CSUM) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  iph->ihl * 4;
+			skb->csum_offset = offsetof(struct udphdr, check);
 			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 							 datalen, IPPROTO_UDP,
 							 0);
@@ -278,7 +270,7 @@
 				udph->check = CSUM_MANGLED_0;
 		}
 	} else
-		nf_proto_csum_replace2(&udph->check, *pskb,
+		nf_proto_csum_replace2(&udph->check, skb,
 				       htons(oldlen), htons(datalen), 1);
 
 	return 1;
@@ -330,7 +322,7 @@
 
 /* TCP SACK sequence number adjustment */
 static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff **pskb,
+nf_nat_sack_adjust(struct sk_buff *skb,
 		   struct tcphdr *tcph,
 		   struct nf_conn *ct,
 		   enum ip_conntrack_info ctinfo)
@@ -338,17 +330,17 @@
 	unsigned int dir, optoff, optend;
 	struct nf_conn_nat *nat = nfct_nat(ct);
 
-	optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
-	optend = ip_hdrlen(*pskb) + tcph->doff * 4;
+	optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
+	optend = ip_hdrlen(skb) + tcph->doff * 4;
 
-	if (!skb_make_writable(pskb, optend))
+	if (!skb_make_writable(skb, optend))
 		return 0;
 
 	dir = CTINFO2DIR(ctinfo);
 
 	while (optoff < optend) {
 		/* Usually: option, length. */
-		unsigned char *op = (*pskb)->data + optoff;
+		unsigned char *op = skb->data + optoff;
 
 		switch (op[0]) {
 		case TCPOPT_EOL:
@@ -365,7 +357,7 @@
 			if (op[0] == TCPOPT_SACK &&
 			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
 			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
-				sack_adjust(*pskb, tcph, optoff+2,
+				sack_adjust(skb, tcph, optoff+2,
 					    optoff+op[1], &nat->seq[!dir]);
 			optoff += op[1];
 		}
@@ -375,7 +367,7 @@
 
 /* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
 int
-nf_nat_seq_adjust(struct sk_buff **pskb,
+nf_nat_seq_adjust(struct sk_buff *skb,
 		  struct nf_conn *ct,
 		  enum ip_conntrack_info ctinfo)
 {
@@ -390,10 +382,10 @@
 	this_way = &nat->seq[dir];
 	other_way = &nat->seq[!dir];
 
-	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+	if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
 		return 0;
 
-	tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+	tcph = (void *)skb->data + ip_hdrlen(skb);
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
 		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 	else
@@ -405,8 +397,8 @@
 	else
 		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
 
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
+	nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+	nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
 
 	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
@@ -415,10 +407,10 @@
 	tcph->seq = newseq;
 	tcph->ack_seq = newack;
 
-	if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+	if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
 		return 0;
 
-	nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
+	nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
 
 	return 1;
 }
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index bcf274b..766e2c1 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -27,7 +27,7 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_nat_irc");
 
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int matchoff,
 			 unsigned int matchlen,
@@ -58,7 +58,7 @@
 	pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
 		 buffer, NIPQUAD(ip), port);
 
-	ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+	ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
 				       matchoff, matchlen, buffer,
 				       strlen(buffer));
 	if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 984ec83..e1385a0 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -110,7 +110,7 @@
 
 /* outbound packets == from PNS to PAC */
 static int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
 		  struct nf_conn *ct,
 		  enum ip_conntrack_info ctinfo,
 		  struct PptpControlHeader *ctlh,
@@ -175,7 +175,7 @@
 		 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
 
 	/* mangle packet */
-	if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 				     cid_off + sizeof(struct pptp_pkt_hdr) +
 				     sizeof(struct PptpControlHeader),
 				     sizeof(new_callid), (char *)&new_callid,
@@ -213,7 +213,7 @@
 
 /* inbound packets == from PAC to PNS */
 static int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
 		 struct nf_conn *ct,
 		 enum ip_conntrack_info ctinfo,
 		 struct PptpControlHeader *ctlh,
@@ -268,7 +268,7 @@
 	pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
 		 ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
 
-	if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 				     pcid_off + sizeof(struct pptp_pkt_hdr) +
 				     sizeof(struct PptpControlHeader),
 				     sizeof(new_pcid), (char *)&new_pcid,
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d562290..b820f99 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -98,21 +98,21 @@
 
 /* manipulate a GRE packet according to maniptype */
 static int
-gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
 	struct gre_hdr *greh;
 	struct gre_hdr_pptp *pgreh;
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	unsigned int hdroff = iphdroff + iph->ihl * 4;
 
 	/* pgreh includes two optional 32bit fields which are not required
 	 * to be there.  That's where the magic '8' comes from */
-	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8))
+	if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
 		return 0;
 
-	greh = (void *)(*pskb)->data + hdroff;
+	greh = (void *)skb->data + hdroff;
 	pgreh = (struct gre_hdr_pptp *)greh;
 
 	/* we only have destination manip of a packet, since 'source key'
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 898d737..b9fc724 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -52,20 +52,20 @@
 }
 
 static int
-icmp_manip_pkt(struct sk_buff **pskb,
+icmp_manip_pkt(struct sk_buff *skb,
 	       unsigned int iphdroff,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
 {
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct icmphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return 0;
 
-	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-	nf_proto_csum_replace2(&hdr->checksum, *pskb,
+	hdr = (struct icmphdr *)(skb->data + hdroff);
+	nf_proto_csum_replace2(&hdr->checksum, skb,
 			       hdr->un.echo.id, tuple->src.u.icmp.id, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 5bbbb2a..6bab2e18 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -88,12 +88,12 @@
 }
 
 static int
-tcp_manip_pkt(struct sk_buff **pskb,
+tcp_manip_pkt(struct sk_buff *skb,
 	      unsigned int iphdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct tcphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 	__be32 oldip, newip;
@@ -103,14 +103,14 @@
 	/* this could be a inner header returned in icmp packet; in such
 	   cases we cannot update the checksum field since it is outside of
 	   the 8 bytes of transport layer headers we are guaranteed */
-	if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+	if (skb->len >= hdroff + sizeof(struct tcphdr))
 		hdrsize = sizeof(struct tcphdr);
 
-	if (!skb_make_writable(pskb, hdroff + hdrsize))
+	if (!skb_make_writable(skb, hdroff + hdrsize))
 		return 0;
 
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+	iph = (struct iphdr *)(skb->data + iphdroff);
+	hdr = (struct tcphdr *)(skb->data + hdroff);
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
@@ -132,8 +132,8 @@
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-	nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-	nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
+	nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+	nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index a0af4fd..cbf1a61 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -86,22 +86,22 @@
 }
 
 static int
-udp_manip_pkt(struct sk_buff **pskb,
+udp_manip_pkt(struct sk_buff *skb,
 	      unsigned int iphdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct udphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 	__be32 oldip, newip;
 	__be16 *portptr, newport;
 
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return 0;
 
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct udphdr *)((*pskb)->data + hdroff);
+	iph = (struct iphdr *)(skb->data + iphdroff);
+	hdr = (struct udphdr *)(skb->data + hdroff);
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
@@ -116,9 +116,9 @@
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
-	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
-		nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-		nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport,
+	if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+		nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
 				       0);
 		if (!hdr->check)
 			hdr->check = CSUM_MANGLED_0;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index f50d020..cfd2742 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -37,7 +37,7 @@
 }
 
 static int
-unknown_manip_pkt(struct sk_buff **pskb,
+unknown_manip_pkt(struct sk_buff *skb,
 		  unsigned int iphdroff,
 		  const struct nf_conntrack_tuple *tuple,
 		  enum nf_nat_manip_type maniptype)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 76ec59a..46b25ab 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -65,7 +65,7 @@
 };
 
 /* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
+static unsigned int ipt_snat_target(struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
@@ -78,7 +78,7 @@
 
 	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
@@ -107,7 +107,7 @@
 	ip_rt_put(rt);
 }
 
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+static unsigned int ipt_dnat_target(struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
@@ -121,14 +121,14 @@
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 		     hooknum == NF_IP_LOCAL_OUT);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	if (hooknum == NF_IP_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
+		warn_if_extra_mangle(ip_hdr(skb)->daddr,
 				     mr->range[0].min_ip);
 
 	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
@@ -204,7 +204,7 @@
 	return nf_nat_setup_info(ct, &range, hooknum);
 }
 
-int nf_nat_rule_find(struct sk_buff **pskb,
+int nf_nat_rule_find(struct sk_buff *skb,
 		     unsigned int hooknum,
 		     const struct net_device *in,
 		     const struct net_device *out,
@@ -212,7 +212,7 @@
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
+	ret = ipt_do_table(skb, hooknum, in, out, &nat_table);
 
 	if (ret == NF_ACCEPT) {
 		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e14d419..ce9edbc 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -60,7 +60,7 @@
 	}
 }
 
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
+static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo,
 			struct nf_conn *ct, const char **dptr, size_t dlen,
 			enum sip_header_pos pos, struct addr_map *map)
 {
@@ -84,15 +84,15 @@
 	} else
 		return 1;
 
-	if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 				      matchoff, matchlen, addr, addrlen))
 		return 0;
-	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	*dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
 	return 1;
 
 }
 
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
+static unsigned int ip_nat_sip(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       struct nf_conn *ct,
 			       const char **dptr)
@@ -101,8 +101,8 @@
 	struct addr_map map;
 	int dataoff, datalen;
 
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	datalen = (*pskb)->len - dataoff;
+	dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+	datalen = skb->len - dataoff;
 	if (datalen < sizeof("SIP/2.0") - 1)
 		return NF_ACCEPT;
 
@@ -121,19 +121,19 @@
 		else
 			pos = POS_REQ_URI;
 
-		if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
+		if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map))
 			return NF_DROP;
 	}
 
-	if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+	if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
+	    !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
+	    !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
+	    !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
 		return NF_DROP;
 	return NF_ACCEPT;
 }
 
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
+static unsigned int mangle_sip_packet(struct sk_buff *skb,
 				      enum ip_conntrack_info ctinfo,
 				      struct nf_conn *ct,
 				      const char **dptr, size_t dlen,
@@ -145,16 +145,16 @@
 	if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
 		return 0;
 
-	if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 				      matchoff, matchlen, buffer, bufflen))
 		return 0;
 
 	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	*dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
 	return 1;
 }
 
-static int mangle_content_len(struct sk_buff **pskb,
+static int mangle_content_len(struct sk_buff *skb,
 			      enum ip_conntrack_info ctinfo,
 			      struct nf_conn *ct,
 			      const char *dptr)
@@ -163,22 +163,22 @@
 	char buffer[sizeof("65536")];
 	int bufflen;
 
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
 
 	/* Get actual SDP lenght */
-	if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+	if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
 			    &matchlen, POS_SDP_HEADER) > 0) {
 
 		/* since ct_sip_get_info() give us a pointer passing 'v='
 		   we need to add 2 bytes in this count. */
-		int c_len = (*pskb)->len - dataoff - matchoff + 2;
+		int c_len = skb->len - dataoff - matchoff + 2;
 
 		/* Now, update SDP length */
-		if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+		if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
 				    &matchlen, POS_CONTENT) > 0) {
 
 			bufflen = sprintf(buffer, "%u", c_len);
-			return nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+			return nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 							matchoff, matchlen,
 							buffer, bufflen);
 		}
@@ -186,7 +186,7 @@
 	return 0;
 }
 
-static unsigned int mangle_sdp(struct sk_buff **pskb,
+static unsigned int mangle_sdp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       struct nf_conn *ct,
 			       __be32 newip, u_int16_t port,
@@ -195,25 +195,25 @@
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
 	unsigned int dataoff, bufflen;
 
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
 
 	/* Mangle owner and contact info. */
 	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+	if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
 			       buffer, bufflen, POS_OWNER_IP4))
 		return 0;
 
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+	if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
 			       buffer, bufflen, POS_CONNECTION_IP4))
 		return 0;
 
 	/* Mangle media port. */
 	bufflen = sprintf(buffer, "%u", port);
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+	if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
 			       buffer, bufflen, POS_MEDIA))
 		return 0;
 
-	return mangle_content_len(pskb, ctinfo, ct, dptr);
+	return mangle_content_len(skb, ctinfo, ct, dptr);
 }
 
 static void ip_nat_sdp_expect(struct nf_conn *ct,
@@ -241,7 +241,7 @@
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
+static unsigned int ip_nat_sdp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       struct nf_conntrack_expect *exp,
 			       const char *dptr)
@@ -277,7 +277,7 @@
 	if (port == 0)
 		return NF_DROP;
 
-	if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
+	if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
 		nf_ct_unexpect_related(exp);
 		return NF_DROP;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 6bfcd3a..03709d6 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1188,9 +1188,9 @@
  */
 static int snmp_translate(struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
-			  struct sk_buff **pskb)
+			  struct sk_buff *skb)
 {
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
 	u_int16_t udplen = ntohs(udph->len);
 	u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1225,13 +1225,13 @@
 
 /* We don't actually set up expectations, just adjust internal IP
  * addresses if this is being NATted */
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
 		struct nf_conn *ct,
 		enum ip_conntrack_info ctinfo)
 {
 	int dir = CTINFO2DIR(ctinfo);
 	unsigned int ret;
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
 
 	/* SNMP replies and originating SNMP traps get mangled */
@@ -1250,7 +1250,7 @@
 	 * enough room for a UDP header.  Just verify the UDP length field so we
 	 * can mess around with the payload.
 	 */
-	if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+	if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
 		 if (net_ratelimit())
 			 printk(KERN_WARNING "SNMP: dropping malformed packet "
 				"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
@@ -1258,11 +1258,11 @@
 		 return NF_DROP;
 	}
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return NF_DROP;
 
 	spin_lock_bh(&snmp_lock);
-	ret = snmp_translate(ct, ctinfo, pskb);
+	ret = snmp_translate(ct, ctinfo, skb);
 	spin_unlock_bh(&snmp_lock);
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 46cc99d..7db76ea 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -67,7 +67,7 @@
 
 static unsigned int
 nf_nat_fn(unsigned int hooknum,
-	  struct sk_buff **pskb,
+	  struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
@@ -80,9 +80,9 @@
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	   and local-out, and nf_nat_out protects post-routing. */
-	NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
+	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	   have dropped it.  Hence it's the user's responsibilty to
 	   packet filter it out, or implement conntrack/NAT for that
@@ -91,10 +91,10 @@
 		/* Exception: ICMP redirect to new connection (not in
 		   hash table yet).  We must not let this through, in
 		   case we're doing NAT to the same network. */
-		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
-			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+			hp = skb_header_pointer(skb, ip_hdrlen(skb),
 						sizeof(_hdr), &_hdr);
 			if (hp != NULL &&
 			    hp->type == ICMP_REDIRECT)
@@ -119,9 +119,9 @@
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
-							   hooknum, pskb))
+							   hooknum, skb))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -141,7 +141,7 @@
 				/* LOCAL_IN hook doesn't have a chain!  */
 				ret = alloc_null_binding(ct, hooknum);
 			else
-				ret = nf_nat_rule_find(pskb, hooknum, in, out,
+				ret = nf_nat_rule_find(skb, hooknum, in, out,
 						       ct);
 
 			if (ret != NF_ACCEPT) {
@@ -159,31 +159,31 @@
 			     ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
 	}
 
-	return nf_nat_packet(ct, ctinfo, hooknum, pskb);
+	return nf_nat_packet(ct, ctinfo, hooknum, skb);
 }
 
 static unsigned int
 nf_nat_in(unsigned int hooknum,
-	  struct sk_buff **pskb,
+	  struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
-	__be32 daddr = ip_hdr(*pskb)->daddr;
+	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != ip_hdr(*pskb)->daddr) {
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = NULL;
+	    daddr != ip_hdr(skb)->daddr) {
+		dst_release(skb->dst);
+		skb->dst = NULL;
 	}
 	return ret;
 }
 
 static unsigned int
 nf_nat_out(unsigned int hooknum,
-	   struct sk_buff **pskb,
+	   struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
@@ -195,14 +195,14 @@
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
@@ -210,7 +210,7 @@
 		    || ct->tuplehash[dir].tuple.src.u.all !=
 		       ct->tuplehash[!dir].tuple.dst.u.all
 		    )
-			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
+			return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
 	}
 #endif
 	return ret;
@@ -218,7 +218,7 @@
 
 static unsigned int
 nf_nat_local_fn(unsigned int hooknum,
-		struct sk_buff **pskb,
+		struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		int (*okfn)(struct sk_buff *))
@@ -228,24 +228,24 @@
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
 				ret = NF_DROP;
 		}
 #ifdef CONFIG_XFRM
 		else if (ct->tuplehash[dir].tuple.dst.u.all !=
 			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (ip_xfrm_me_harder(pskb))
+			if (ip_xfrm_me_harder(skb))
 				ret = NF_DROP;
 #endif
 	}
@@ -254,7 +254,7 @@
 
 static unsigned int
 nf_nat_adjust(unsigned int hooknum,
-	      struct sk_buff **pskb,
+	      struct sk_buff *skb,
 	      const struct net_device *in,
 	      const struct net_device *out,
 	      int (*okfn)(struct sk_buff *))
@@ -262,10 +262,10 @@
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
 		pr_debug("nf_nat_standalone: adjusting sequence number\n");
-		if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
+		if (!nf_nat_seq_adjust(skb, ct, ctinfo))
 			return NF_DROP;
 	}
 	return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 04dfeae..0ecec70 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -20,7 +20,7 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_nat_tftp");
 
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
 			 struct nf_conntrack_expect *exp)
 {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e5b05b0..fd16cb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -70,8 +70,8 @@
 	seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
 	seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
-	seq_printf(seq,  "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
-		   atomic_read(&ip_frag_mem));
+	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
+			ip_frag_nqueues(), ip_frag_mem());
 	return 0;
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index eb286ab..c98ef16 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/cipso_ipv4.h>
+#include <net/inet_frag.h>
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
@@ -357,7 +358,7 @@
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
 		.procname	= "ipfrag_high_thresh",
-		.data		= &sysctl_ipfrag_high_thresh,
+		.data		= &ip4_frags_ctl.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -365,7 +366,7 @@
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
 		.procname	= "ipfrag_low_thresh",
-		.data		= &sysctl_ipfrag_low_thresh,
+		.data		= &ip4_frags_ctl.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -381,7 +382,7 @@
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_TIME,
 		.procname	= "ipfrag_time",
-		.data		= &sysctl_ipfrag_time,
+		.data		= &ip4_frags_ctl.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -732,7 +733,7 @@
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
 		.procname	= "ipfrag_secret_interval",
-		.data		= &sysctl_ipfrag_secret_interval,
+		.data		= &ip4_frags_ctl.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0a42e93..0f00966 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1995,8 +1995,7 @@
 }
 
 /* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk,
-			       int packets, u32 high_seq)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -2019,7 +2018,7 @@
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
 		cnt += tcp_skb_pcount(skb);
-		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2040,9 +2039,9 @@
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, lost, tp->high_seq);
+		tcp_mark_head_lost(sk, lost);
 	} else {
-		tcp_mark_head_lost(sk, 1, tp->high_seq);
+		tcp_mark_head_lost(sk, 1);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -2381,7 +2380,7 @@
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
+		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 434ef30..a4edd66 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -78,7 +78,7 @@
 	while (likely((err = xfrm4_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
-		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
 			      skb->dst->dev, dst_output);
 		if (unlikely(err != 1))
 			break;
@@ -86,7 +86,7 @@
 		if (!skb->dst->xfrm)
 			return dst_output(skb);
 
-		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
 			      skb->dst->dev, xfrm4_output_finish2);
 		if (unlikely(err != 1))
 			break;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index c82d4d4..1e89efd 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -102,7 +102,7 @@
 
 struct tlvtype_proc {
 	int	type;
-	int	(*func)(struct sk_buff **skbp, int offset);
+	int	(*func)(struct sk_buff *skb, int offset);
 };
 
 /*********************
@@ -111,10 +111,8 @@
 
 /* An unknown option is detected, decide what to do */
 
-static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
-
 	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
@@ -139,9 +137,8 @@
 
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
 	const unsigned char *nh = skb_network_header(skb);
 	int off = skb_network_header_len(skb);
@@ -172,13 +169,13 @@
 					/* type specific length/alignment
 					   checks will be performed in the
 					   func(). */
-					if (curr->func(skbp, off) == 0)
+					if (curr->func(skb, off) == 0)
 						return 0;
 					break;
 				}
 			}
 			if (curr->type < 0) {
-				if (ip6_tlvopt_unknown(skbp, off) == 0)
+				if (ip6_tlvopt_unknown(skb, off) == 0)
 					return 0;
 			}
 			break;
@@ -198,9 +195,8 @@
  *****************************/
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
 	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -234,22 +230,13 @@
 		goto discard;
 
 	if (skb_cloned(skb)) {
-		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
-		struct inet6_skb_parm *opt2;
-
-		if (skb2 == NULL)
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			goto discard;
 
-		opt2 = IP6CB(skb2);
-		memcpy(opt2, opt, sizeof(*opt2));
-
-		kfree_skb(skb);
-
 		/* update all variable using below by copied skbuff */
-		*skbp = skb = skb2;
-		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
 						  optoff);
-		ipv6h = ipv6_hdr(skb2);
+		ipv6h = ipv6_hdr(skb);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -280,9 +267,8 @@
 	{-1,			NULL}
 };
 
-static int ipv6_destopt_rcv(struct sk_buff **skbp)
+static int ipv6_destopt_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16 dstbuf;
@@ -304,9 +290,8 @@
 #endif
 
 	dst = dst_clone(skb->dst);
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
 		dst_release(dst);
-		skb = *skbp;
 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -337,10 +322,8 @@
   NONE header. No data in packet.
  ********************************/
 
-static int ipv6_nodata_rcv(struct sk_buff **skbp)
+static int ipv6_nodata_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
-
 	kfree_skb(skb);
 	return 0;
 }
@@ -360,9 +343,8 @@
   Routing header.
  ********************************/
 
-static int ipv6_rthdr_rcv(struct sk_buff **skbp)
+static int ipv6_rthdr_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
@@ -464,18 +446,14 @@
 	   Do not damage packets queued somewhere.
 	 */
 	if (skb_cloned(skb)) {
-		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
 		/* the copy is a forwarded packet */
-		if (skb2 == NULL) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_OUTDISCARDS);
 			kfree_skb(skb);
 			return -1;
 		}
-		kfree_skb(skb);
-		*skbp = skb = skb2;
-		opt = IP6CB(skb2);
-		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
+		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -578,9 +556,8 @@
 
 /* Router Alert as of RFC 2711 */
 
-static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
 	const unsigned char *nh = skb_network_header(skb);
 
 	if (nh[optoff + 1] == 2) {
@@ -595,9 +572,8 @@
 
 /* Jumbo payload */
 
-static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
 	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
 
@@ -648,9 +624,8 @@
 	{ -1, }
 };
 
-int ipv6_parse_hopopts(struct sk_buff **skbp)
+int ipv6_parse_hopopts(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
@@ -667,8 +642,7 @@
 	}
 
 	opt->hop = sizeof(struct ipv6hdr);
-	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
-		skb = *skbp;
+	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 47b8ce2..9bb031f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -82,7 +82,7 @@
 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
 #define icmpv6_socket	__get_cpu_var(__icmpv6_socket)
 
-static int icmpv6_rcv(struct sk_buff **pskb);
+static int icmpv6_rcv(struct sk_buff *skb);
 
 static struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
@@ -614,9 +614,8 @@
  *	Handle icmp messages
  */
 
-static int icmpv6_rcv(struct sk_buff **pskb)
+static int icmpv6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct in6_addr *saddr, *daddr;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 25b9317..78de42a 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -146,7 +146,7 @@
 	__ip6_dst_store(sk, dst, daddr, saddr);
 
 #ifdef CONFIG_XFRM
-	if (dst) {
+	{
 		struct rt6_info *rt = (struct rt6_info  *)dst;
 		rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
 	}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9149fc2..fac6f7f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -125,7 +125,7 @@
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
-		if (ipv6_parse_hopopts(&skb) < 0) {
+		if (ipv6_parse_hopopts(skb) < 0) {
 			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
 			rcu_read_unlock();
 			return 0;
@@ -149,7 +149,7 @@
  */
 
 
-static inline int ip6_input_finish(struct sk_buff *skb)
+static int ip6_input_finish(struct sk_buff *skb)
 {
 	struct inet6_protocol *ipprot;
 	struct sock *raw_sk;
@@ -199,7 +199,7 @@
 		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto discard;
 
-		ret = ipprot->handler(&skb);
+		ret = ipprot->handler(skb);
 		if (ret > 0)
 			goto resubmit;
 		else if (ret == 0)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 011082e..13565df 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -70,7 +70,7 @@
 	spin_unlock_bh(&ip6_id_lock);
 }
 
-static inline int ip6_output_finish(struct sk_buff *skb)
+static int ip6_output_finish(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 38b1496..b1326c2 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,15 +68,15 @@
 	}
 }
 
-static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
 {
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = ipv6_hdr(*pskb);
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
-			return ip6_route_me_harder(*pskb);
+			return ip6_route_me_harder(skb);
 	}
 	return 0;
 }
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 0473145..6413a30 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -332,6 +332,7 @@
 ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 {
 	int diff;
+	int err;
 	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
 
 	if (v->data_len < sizeof(*user_iph))
@@ -344,25 +345,18 @@
 		if (v->data_len > 0xFFFF)
 			return -EINVAL;
 		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-
-			newskb = skb_copy_expand(e->skb,
-						 skb_headroom(e->skb),
-						 diff,
-						 GFP_ATOMIC);
-			if (newskb == NULL) {
+			err = pskb_expand_head(e->skb, 0,
+					       diff - skb_tailroom(e->skb),
+					       GFP_ATOMIC);
+			if (err) {
 				printk(KERN_WARNING "ip6_queue: OOM "
 				      "in mangle, dropping packet\n");
-				return -ENOMEM;
+				return err;
 			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(&e->skb, v->data_len))
+	if (!skb_make_writable(e->skb, v->data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cd9df02..acaba15 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,7 +205,7 @@
 }
 
 static unsigned int
-ip6t_error(struct sk_buff **pskb,
+ip6t_error(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -350,7 +350,7 @@
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ip6t_do_table(struct sk_buff **pskb,
+ip6t_do_table(struct sk_buff *skb,
 	      unsigned int hook,
 	      const struct net_device *in,
 	      const struct net_device *out,
@@ -389,17 +389,17 @@
 	do {
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
-		if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
+		if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
 			&protoff, &offset, &hotdrop)) {
 			struct ip6t_entry_target *t;
 
 			if (IP6T_MATCH_ITERATE(e, do_match,
-					       *pskb, in, out,
+					       skb, in, out,
 					       offset, protoff, &hotdrop) != 0)
 				goto no_match;
 
 			ADD_COUNTER(e->counters,
-				    ntohs(ipv6_hdr(*pskb)->payload_len)
+				    ntohs(ipv6_hdr(skb)->payload_len)
 				    + IPV6_HDR_LEN,
 				    1);
 
@@ -409,8 +409,8 @@
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 			/* The packet is traced: log it */
-			if (unlikely((*pskb)->nf_trace))
-				trace_packet(*pskb, hook, in, out,
+			if (unlikely(skb->nf_trace))
+				trace_packet(skb, hook, in, out,
 					     table->name, private, e);
 #endif
 			/* Standard target? */
@@ -448,7 +448,7 @@
 				((struct ip6t_entry *)table_base)->comefrom
 					= 0xeeeeeeec;
 #endif
-				verdict = t->u.kernel.target->target(pskb,
+				verdict = t->u.kernel.target->target(skb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ad4d943..9afc836 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -18,7 +18,7 @@
 MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
 MODULE_LICENSE("GPL");
 
-static unsigned int ip6t_hl_target(struct sk_buff **pskb,
+static unsigned int ip6t_hl_target(struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   unsigned int hooknum,
@@ -29,10 +29,10 @@
 	const struct ip6t_HL_info *info = targinfo;
 	int new_hl;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return NF_DROP;
 
-	ip6h = ipv6_hdr(*pskb);
+	ip6h = ipv6_hdr(skb);
 
 	switch (info->mode) {
 		case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 6ab9900..7a48c34 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -431,7 +431,7 @@
 }
 
 static unsigned int
-ip6t_log_target(struct sk_buff **pskb,
+ip6t_log_target(struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		unsigned int hooknum,
@@ -445,8 +445,7 @@
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
-			loginfo->prefix);
+	ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix);
 	return XT_CONTINUE;
 }
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 3fd08d5..1a7d291 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -172,7 +172,7 @@
 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
 }
 
-static unsigned int reject6_target(struct sk_buff **pskb,
+static unsigned int reject6_target(struct sk_buff *skb,
 			   const struct net_device *in,
 			   const struct net_device *out,
 			   unsigned int hooknum,
@@ -187,25 +187,25 @@
 	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
-		send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
+		send_unreach(skb, ICMPV6_NOROUTE, hooknum);
 		break;
 	case IP6T_ICMP6_ADM_PROHIBITED:
-		send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
+		send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum);
 		break;
 	case IP6T_ICMP6_NOT_NEIGHBOUR:
-		send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+		send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
 		break;
 	case IP6T_ICMP6_ADDR_UNREACH:
-		send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
+		send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum);
 		break;
 	case IP6T_ICMP6_PORT_UNREACH:
-		send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
+		send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum);
 		break;
 	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
 		break;
 	case IP6T_TCP_RESET:
-		send_reset(*pskb);
+		send_reset(skb);
 		break;
 	default:
 		if (net_ratelimit())
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 7e32e2a..1d26b20 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,32 +60,32 @@
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+	return ip6t_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
 ip6t_local_out_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
 #if 0
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+	return ip6t_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index f0a9efa..a0b6381 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -68,17 +68,17 @@
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_route_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+	return ip6t_do_table(skb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
 ip6t_local_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
@@ -91,8 +91,8 @@
 
 #if 0
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
@@ -100,22 +100,22 @@
 #endif
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority,  */
-	memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
-	memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
-	mark = (*pskb)->mark;
-	hop_limit = ipv6_hdr(*pskb)->hop_limit;
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
 
 	/* flowlabel and prio (includes version, which shouldn't change either */
-	flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
+	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+	ret = ip6t_do_table(skb, hook, in, out, &packet_mangler);
 
 	if (ret != NF_DROP && ret != NF_STOLEN
-		&& (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
-		    || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
-		    || (*pskb)->mark != mark
-		    || ipv6_hdr(*pskb)->hop_limit != hop_limit))
-		return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
+		&& (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
+		    || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr))
+		    || skb->mark != mark
+		    || ipv6_hdr(skb)->hop_limit != hop_limit))
+		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ec290e4..8f7109f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -46,12 +46,12 @@
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_raw);
+	return ip6t_do_table(skb, hook, in, out, &packet_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 37a3db9..0e40948 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -18,6 +18,7 @@
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
 #include <net/ipv6.h>
+#include <net/inet_frag.h>
 
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -145,7 +146,7 @@
 }
 
 static unsigned int ipv6_confirm(unsigned int hooknum,
-				 struct sk_buff **pskb,
+				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
 				 int (*okfn)(struct sk_buff *))
@@ -155,12 +156,12 @@
 	struct nf_conntrack_helper *helper;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret, protoff;
-	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
-	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+	unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
 
 
 	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
 		goto out;
 
@@ -172,23 +173,23 @@
 	if (!helper)
 		goto out;
 
-	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
-					 (*pskb)->len - extoff);
-	if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) {
+	protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
+					 skb->len - extoff);
+	if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
 		pr_debug("proto header not found\n");
 		return NF_ACCEPT;
 	}
 
-	ret = helper->help(pskb, protoff, ct, ctinfo);
+	ret = helper->help(skb, protoff, ct, ctinfo);
 	if (ret != NF_ACCEPT)
 		return ret;
 out:
 	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(pskb);
+	return nf_conntrack_confirm(skb);
 }
 
 static unsigned int ipv6_defrag(unsigned int hooknum,
-				struct sk_buff **pskb,
+				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
 				int (*okfn)(struct sk_buff *))
@@ -196,17 +197,17 @@
 	struct sk_buff *reasm;
 
 	/* Previously seen (loopback)?  */
-	if ((*pskb)->nfct)
+	if (skb->nfct)
 		return NF_ACCEPT;
 
-	reasm = nf_ct_frag6_gather(*pskb);
+	reasm = nf_ct_frag6_gather(skb);
 
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
 
 	/* error occured or not fragmented */
-	if (reasm == *pskb)
+	if (reasm == skb)
 		return NF_ACCEPT;
 
 	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
@@ -216,12 +217,12 @@
 }
 
 static unsigned int ipv6_conntrack_in(unsigned int hooknum,
-				      struct sk_buff **pskb,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *reasm = (*pskb)->nfct_reasm;
+	struct sk_buff *reasm = skb->nfct_reasm;
 
 	/* This packet is fragmented and has reassembled packet. */
 	if (reasm) {
@@ -229,32 +230,32 @@
 		if (!reasm->nfct) {
 			unsigned int ret;
 
-			ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
+			ret = nf_conntrack_in(PF_INET6, hooknum, reasm);
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
 		nf_conntrack_get(reasm->nfct);
-		(*pskb)->nfct = reasm->nfct;
-		(*pskb)->nfctinfo = reasm->nfctinfo;
+		skb->nfct = reasm->nfct;
+		skb->nfctinfo = reasm->nfctinfo;
 		return NF_ACCEPT;
 	}
 
-	return nf_conntrack_in(PF_INET6, hooknum, pskb);
+	return nf_conntrack_in(PF_INET6, hooknum, skb);
 }
 
 static unsigned int ipv6_conntrack_local(unsigned int hooknum,
-					 struct sk_buff **pskb,
+					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
 					 int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+	if (skb->len < sizeof(struct ipv6hdr)) {
 		if (net_ratelimit())
 			printk("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
+	return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] = {
@@ -307,7 +308,7 @@
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_TIMEOUT,
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &nf_ct_frag6_timeout,
+		.data		= &nf_frags_ctl.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -315,7 +316,7 @@
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &nf_ct_frag6_low_thresh,
+		.data		= &nf_frags_ctl.low_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -323,7 +324,7 @@
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &nf_ct_frag6_high_thresh,
+		.data		= &nf_frags_ctl.high_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 25442a8..726fafd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -31,6 +31,7 @@
 
 #include <net/sock.h>
 #include <net/snmp.h>
+#include <net/inet_frag.h>
 
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -48,10 +49,6 @@
 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
 
-unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
-unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
-unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
-
 struct nf_ct_frag6_skb_cb
 {
 	struct inet6_skb_parm	h;
@@ -63,51 +60,24 @@
 
 struct nf_ct_frag6_queue
 {
-	struct hlist_node	list;
-	struct list_head	lru_list;	/* lru list member	*/
+	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
-	spinlock_t		lock;
-	atomic_t		refcnt;
-	struct timer_list	timer;		/* expire timer		*/
-	struct sk_buff		*fragments;
-	int			len;
-	int			meat;
-	ktime_t			stamp;
 	unsigned int		csum;
-	__u8			last_in;	/* has first/last segment arrived? */
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
 	__u16			nhoffset;
 };
 
-/* Hash table. */
+struct inet_frags_ctl nf_frags_ctl __read_mostly = {
+	.high_thresh	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
+	.timeout	 = IPV6_FRAG_TIMEOUT,
+	.secret_interval = 10 * 60 * HZ,
+};
 
-#define FRAG6Q_HASHSZ	64
-
-static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static DEFINE_RWLOCK(nf_ct_frag6_lock);
-static u32 nf_ct_frag6_hash_rnd;
-static LIST_HEAD(nf_ct_frag6_lru_list);
-int nf_ct_frag6_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
-{
-	hlist_del(&fq->list);
-	list_del(&fq->lru_list);
-	nf_ct_frag6_nqueues--;
-}
-
-static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
-{
-	write_lock(&nf_ct_frag6_lock);
-	__fq_unlink(fq);
-	write_unlock(&nf_ct_frag6_lock);
-}
+static struct inet_frags nf_frags;
 
 static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 			       struct in6_addr *daddr)
@@ -120,7 +90,7 @@
 
 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += nf_ct_frag6_hash_rnd;
+	c += nf_frags.rnd;
 	__jhash_mix(a, b, c);
 
 	a += (__force u32)saddr->s6_addr32[3];
@@ -133,100 +103,54 @@
 	c += (__force u32)id;
 	__jhash_mix(a, b, c);
 
-	return c & (FRAG6Q_HASHSZ - 1);
+	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list nf_ct_frag6_secret_timer;
-int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
-
-static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
+	struct nf_ct_frag6_queue *nq;
 
-	write_lock(&nf_ct_frag6_lock);
-	get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
-	for (i = 0; i < FRAG6Q_HASHSZ; i++) {
-		struct nf_ct_frag6_queue *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) {
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-			if (hval != i) {
-				hlist_del(&q->list);
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->list,
-					       &nf_ct_frag6_hash[hval]);
-			}
-		}
-	}
-	write_unlock(&nf_ct_frag6_lock);
-
-	mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
+	nq = container_of(q, struct nf_ct_frag6_queue, q);
+	return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
 }
 
-atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
+static void nf_skb_free(struct sk_buff *skb)
+{
+	if (NFCT_FRAG6_CB(skb)->orig)
+		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
 
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &nf_ct_frag6_mem);
-	if (NFCT_FRAG6_CB(skb)->orig)
-		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-
+	atomic_sub(skb->truesize, &nf_frags.mem);
+	nf_skb_free(skb);
 	kfree_skb(skb);
 }
 
-static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
-				   unsigned int *work)
+static void nf_frag_free(struct inet_frag_queue *q)
 {
-	if (work)
-		*work -= sizeof(struct nf_ct_frag6_queue);
-	atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
-	kfree(fq);
+	kfree(container_of(q, struct nf_ct_frag6_queue, q));
 }
 
 static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
 {
-	struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+	struct nf_ct_frag6_queue *fq;
 
-	if (!fq)
+	fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+	if (fq == NULL)
 		return NULL;
-	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
 	return fq;
 }
 
 /* Destruction primitives. */
 
-/* Complete destruction of fq. */
-static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
-				unsigned int *work)
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
 {
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
-{
-	if (atomic_dec_and_test(&fq->refcnt))
-		nf_ct_frag6_destroy(fq, work);
+	inet_frag_put(&fq->q, &nf_frags);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -234,62 +158,28 @@
  */
 static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
 {
-	if (del_timer(&fq->timer))
-		atomic_dec(&fq->refcnt);
-
-	if (!(fq->last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->refcnt);
-		fq->last_in |= COMPLETE;
-	}
+	inet_frag_kill(&fq->q, &nf_frags);
 }
 
 static void nf_ct_frag6_evictor(void)
 {
-	struct nf_ct_frag6_queue *fq;
-	struct list_head *tmp;
-	unsigned int work;
-
-	work = atomic_read(&nf_ct_frag6_mem);
-	if (work <= nf_ct_frag6_low_thresh)
-		return;
-
-	work -= nf_ct_frag6_low_thresh;
-	while (work > 0) {
-		read_lock(&nf_ct_frag6_lock);
-		if (list_empty(&nf_ct_frag6_lru_list)) {
-			read_unlock(&nf_ct_frag6_lock);
-			return;
-		}
-		tmp = nf_ct_frag6_lru_list.next;
-		BUG_ON(tmp == NULL);
-		fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
-		atomic_inc(&fq->refcnt);
-		read_unlock(&nf_ct_frag6_lock);
-
-		spin_lock(&fq->lock);
-		if (!(fq->last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->lock);
-
-		fq_put(fq, &work);
-	}
+	inet_frag_evictor(&nf_frags);
 }
 
 static void nf_ct_frag6_expire(unsigned long data)
 {
 	struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & COMPLETE)
 		goto out;
 
 	fq_kill(fq);
 
 out:
-	spin_unlock(&fq->lock);
-	fq_put(fq, NULL);
+	spin_unlock(&fq->q.lock);
+	fq_put(fq);
 }
 
 /* Creation primitives. */
@@ -302,31 +192,31 @@
 	struct hlist_node *n;
 #endif
 
-	write_lock(&nf_ct_frag6_lock);
+	write_lock(&nf_frags.lock);
 #ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
 		if (fq->id == fq_in->id &&
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
 		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			write_unlock(&nf_ct_frag6_lock);
-			fq_in->last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
+			atomic_inc(&fq->q.refcnt);
+			write_unlock(&nf_frags.lock);
+			fq_in->q.last_in |= COMPLETE;
+			fq_put(fq_in);
 			return fq;
 		}
 	}
 #endif
 	fq = fq_in;
 
-	if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
-		atomic_inc(&fq->refcnt);
+	if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout))
+		atomic_inc(&fq->q.refcnt);
 
-	atomic_inc(&fq->refcnt);
-	hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]);
-	INIT_LIST_HEAD(&fq->lru_list);
-	list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
-	nf_ct_frag6_nqueues++;
-	write_unlock(&nf_ct_frag6_lock);
+	atomic_inc(&fq->q.refcnt);
+	hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
+	INIT_LIST_HEAD(&fq->q.lru_list);
+	list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
+	nf_frags.nqueues++;
+	write_unlock(&nf_frags.lock);
 	return fq;
 }
 
@@ -341,15 +231,13 @@
 		goto oom;
 	}
 
-	memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
-
 	fq->id = id;
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
-	spin_lock_init(&fq->lock);
-	atomic_set(&fq->refcnt, 1);
+	setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
+	spin_lock_init(&fq->q.lock);
+	atomic_set(&fq->q.refcnt, 1);
 
 	return nf_ct_frag6_intern(hash, fq);
 
@@ -364,17 +252,17 @@
 	struct hlist_node *n;
 	unsigned int hash = ip6qhashfn(id, src, dst);
 
-	read_lock(&nf_ct_frag6_lock);
-	hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+	read_lock(&nf_frags.lock);
+	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
 		if (fq->id == id &&
 		    ipv6_addr_equal(src, &fq->saddr) &&
 		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			read_unlock(&nf_ct_frag6_lock);
+			atomic_inc(&fq->q.refcnt);
+			read_unlock(&nf_frags.lock);
 			return fq;
 		}
 	}
-	read_unlock(&nf_ct_frag6_lock);
+	read_unlock(&nf_frags.lock);
 
 	return nf_ct_frag6_create(hash, id, src, dst);
 }
@@ -386,7 +274,7 @@
 	struct sk_buff *prev, *next;
 	int offset, end;
 
-	if (fq->last_in & COMPLETE) {
+	if (fq->q.last_in & COMPLETE) {
 		pr_debug("Allready completed\n");
 		goto err;
 	}
@@ -412,13 +300,13 @@
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrupted.
 		 */
-		if (end < fq->len ||
-		    ((fq->last_in & LAST_IN) && end != fq->len)) {
+		if (end < fq->q.len ||
+		    ((fq->q.last_in & LAST_IN) && end != fq->q.len)) {
 			pr_debug("already received last fragment\n");
 			goto err;
 		}
-		fq->last_in |= LAST_IN;
-		fq->len = end;
+		fq->q.last_in |= LAST_IN;
+		fq->q.len = end;
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -430,13 +318,13 @@
 			pr_debug("end of fragment not rounded to 8 bytes.\n");
 			return -1;
 		}
-		if (end > fq->len) {
+		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (fq->last_in & LAST_IN) {
+			if (fq->q.last_in & LAST_IN) {
 				pr_debug("last packet already reached.\n");
 				goto err;
 			}
-			fq->len = end;
+			fq->q.len = end;
 		}
 	}
 
@@ -458,7 +346,7 @@
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for (next = fq->fragments; next != NULL; next = next->next) {
+	for (next = fq->q.fragments; next != NULL; next = next->next) {
 		if (NFCT_FRAG6_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -503,7 +391,7 @@
 
 			/* next fragment */
 			NFCT_FRAG6_CB(next)->offset += i;
-			fq->meat -= i;
+			fq->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
@@ -518,9 +406,9 @@
 			if (prev)
 				prev->next = next;
 			else
-				fq->fragments = next;
+				fq->q.fragments = next;
 
-			fq->meat -= free_it->len;
+			fq->q.meat -= free_it->len;
 			frag_kfree_skb(free_it, NULL);
 		}
 	}
@@ -532,23 +420,23 @@
 	if (prev)
 		prev->next = skb;
 	else
-		fq->fragments = skb;
+		fq->q.fragments = skb;
 
 	skb->dev = NULL;
-	fq->stamp = skb->tstamp;
-	fq->meat += skb->len;
-	atomic_add(skb->truesize, &nf_ct_frag6_mem);
+	fq->q.stamp = skb->tstamp;
+	fq->q.meat += skb->len;
+	atomic_add(skb->truesize, &nf_frags.mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
 	 */
 	if (offset == 0) {
 		fq->nhoffset = nhoff;
-		fq->last_in |= FIRST_IN;
+		fq->q.last_in |= FIRST_IN;
 	}
-	write_lock(&nf_ct_frag6_lock);
-	list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
-	write_unlock(&nf_ct_frag6_lock);
+	write_lock(&nf_frags.lock);
+	list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
+	write_unlock(&nf_frags.lock);
 	return 0;
 
 err:
@@ -567,7 +455,7 @@
 static struct sk_buff *
 nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 {
-	struct sk_buff *fp, *op, *head = fq->fragments;
+	struct sk_buff *fp, *op, *head = fq->q.fragments;
 	int    payload_len;
 
 	fq_kill(fq);
@@ -577,7 +465,7 @@
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
-		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
 		pr_debug("payload len is too large.\n");
@@ -614,7 +502,7 @@
 		clone->ip_summed = head->ip_summed;
 
 		NFCT_FRAG6_CB(clone)->orig = NULL;
-		atomic_add(clone->truesize, &nf_ct_frag6_mem);
+		atomic_add(clone->truesize, &nf_frags.mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -628,7 +516,7 @@
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &nf_ct_frag6_mem);
+	atomic_sub(head->truesize, &nf_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -638,12 +526,12 @@
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &nf_ct_frag6_mem);
+		atomic_sub(fp->truesize, &nf_frags.mem);
 	}
 
 	head->next = NULL;
 	head->dev = dev;
-	head->tstamp = fq->stamp;
+	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
@@ -652,7 +540,7 @@
 					  skb_network_header_len(head),
 					  head->csum);
 
-	fq->fragments = NULL;
+	fq->q.fragments = NULL;
 
 	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
 	fp = skb_shinfo(head)->frag_list;
@@ -788,7 +676,7 @@
 		goto ret_orig;
 	}
 
-	if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
+	if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh)
 		nf_ct_frag6_evictor();
 
 	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -797,23 +685,23 @@
 		goto ret_orig;
 	}
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
 	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
-		spin_unlock(&fq->lock);
+		spin_unlock(&fq->q.lock);
 		pr_debug("Can't insert skb to queue\n");
-		fq_put(fq, NULL);
+		fq_put(fq);
 		goto ret_orig;
 	}
 
-	if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
+	if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) {
 		ret_skb = nf_ct_frag6_reasm(fq, dev);
 		if (ret_skb == NULL)
 			pr_debug("Can't reassemble fragmented packets\n");
 	}
-	spin_unlock(&fq->lock);
+	spin_unlock(&fq->q.lock);
 
-	fq_put(fq, NULL);
+	fq_put(fq);
 	return ret_skb;
 
 ret_orig:
@@ -859,20 +747,20 @@
 
 int nf_ct_frag6_init(void)
 {
-	nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				   (jiffies ^ (jiffies >> 6)));
-
-	setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
-	nf_ct_frag6_secret_timer.expires = jiffies
-					   + nf_ct_frag6_secret_interval;
-	add_timer(&nf_ct_frag6_secret_timer);
+	nf_frags.ctl = &nf_frags_ctl;
+	nf_frags.hashfn = nf_hashfn;
+	nf_frags.destructor = nf_frag_free;
+	nf_frags.skb_free = nf_skb_free;
+	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+	inet_frags_init(&nf_frags);
 
 	return 0;
 }
 
 void nf_ct_frag6_cleanup(void)
 {
-	del_timer(&nf_ct_frag6_secret_timer);
-	nf_ct_frag6_low_thresh = 0;
+	inet_frags_fini(&nf_frags);
+
+	nf_frags_ctl.low_thresh = 0;
 	nf_ct_frag6_evictor();
 }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index db94501..be526ad 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -54,7 +54,7 @@
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       fold_prot_inuse(&rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
-		       ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
+		       ip6_frag_nqueues(), ip6_frag_mem());
 	return 0;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 31601c9..6ad19cf 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -42,6 +42,7 @@
 #include <linux/icmpv6.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
+#include <linux/skbuff.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -53,11 +54,7 @@
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
-
-int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
-int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
-
-int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
+#include <net/inet_frag.h>
 
 struct ip6frag_skb_cb
 {
@@ -74,53 +71,39 @@
 
 struct frag_queue
 {
-	struct hlist_node	list;
-	struct list_head lru_list;		/* lru list member	*/
+	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
-	spinlock_t		lock;
-	atomic_t		refcnt;
-	struct timer_list	timer;		/* expire timer		*/
-	struct sk_buff		*fragments;
-	int			len;
-	int			meat;
 	int			iif;
-	ktime_t			stamp;
 	unsigned int		csum;
-	__u8			last_in;	/* has first/last segment arrived? */
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
 	__u16			nhoffset;
 };
 
-/* Hash table. */
+struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
+	.high_thresh 	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
+	.timeout	 = IPV6_FRAG_TIMEOUT,
+	.secret_interval = 10 * 60 * HZ,
+};
 
-#define IP6Q_HASHSZ	64
+static struct inet_frags ip6_frags;
 
-static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ];
-static DEFINE_RWLOCK(ip6_frag_lock);
-static u32 ip6_frag_hash_rnd;
-static LIST_HEAD(ip6_frag_lru_list);
-int ip6_frag_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct frag_queue *fq)
+int ip6_frag_nqueues(void)
 {
-	hlist_del(&fq->list);
-	list_del(&fq->lru_list);
-	ip6_frag_nqueues--;
+	return ip6_frags.nqueues;
 }
 
-static __inline__ void fq_unlink(struct frag_queue *fq)
+int ip6_frag_mem(void)
 {
-	write_lock(&ip6_frag_lock);
-	__fq_unlink(fq);
-	write_unlock(&ip6_frag_lock);
+	return atomic_read(&ip6_frags.mem);
 }
 
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+			  struct net_device *dev);
+
 /*
  * callers should be careful not to use the hash value outside the ipfrag_lock
  * as doing so could race with ipfrag_hash_rnd being recalculated.
@@ -136,7 +119,7 @@
 
 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frag_hash_rnd;
+	c += ip6_frags.rnd;
 	__jhash_mix(a, b, c);
 
 	a += (__force u32)saddr->s6_addr32[3];
@@ -149,60 +132,29 @@
 	c += (__force u32)id;
 	__jhash_mix(a, b, c);
 
-	return c & (IP6Q_HASHSZ - 1);
+	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ip6_frag_secret_rebuild(unsigned long dummy)
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
+	struct frag_queue *fq;
 
-	write_lock(&ip6_frag_lock);
-	get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
-	for (i = 0; i < IP6Q_HASHSZ; i++) {
-		struct frag_queue *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) {
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-
-			if (hval != i) {
-				hlist_del(&q->list);
-
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->list,
-					       &ip6_frag_hash[hval]);
-
-			}
-		}
-	}
-	write_unlock(&ip6_frag_lock);
-
-	mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
+	fq = container_of(q, struct frag_queue, q);
+	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
 }
 
-atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip6_frag_mem);
+	atomic_sub(skb->truesize, &ip6_frags.mem);
 	kfree_skb(skb);
 }
 
-static inline void frag_free_queue(struct frag_queue *fq, int *work)
+static void ip6_frag_free(struct inet_frag_queue *fq)
 {
-	if (work)
-		*work -= sizeof(struct frag_queue);
-	atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
-	kfree(fq);
+	kfree(container_of(fq, struct frag_queue, q));
 }
 
 static inline struct frag_queue *frag_alloc_queue(void)
@@ -211,36 +163,15 @@
 
 	if(!fq)
 		return NULL;
-	atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
+	atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
 	return fq;
 }
 
 /* Destruction primitives. */
 
-/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq, int *work)
+static __inline__ void fq_put(struct frag_queue *fq)
 {
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct frag_queue *fq, int *work)
-{
-	if (atomic_dec_and_test(&fq->refcnt))
-		ip6_frag_destroy(fq, work);
+	inet_frag_put(&fq->q, &ip6_frags);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -248,45 +179,16 @@
  */
 static __inline__ void fq_kill(struct frag_queue *fq)
 {
-	if (del_timer(&fq->timer))
-		atomic_dec(&fq->refcnt);
-
-	if (!(fq->last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->refcnt);
-		fq->last_in |= COMPLETE;
-	}
+	inet_frag_kill(&fq->q, &ip6_frags);
 }
 
 static void ip6_evictor(struct inet6_dev *idev)
 {
-	struct frag_queue *fq;
-	struct list_head *tmp;
-	int work;
+	int evicted;
 
-	work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
-	if (work <= 0)
-		return;
-
-	while(work > 0) {
-		read_lock(&ip6_frag_lock);
-		if (list_empty(&ip6_frag_lru_list)) {
-			read_unlock(&ip6_frag_lock);
-			return;
-		}
-		tmp = ip6_frag_lru_list.next;
-		fq = list_entry(tmp, struct frag_queue, lru_list);
-		atomic_inc(&fq->refcnt);
-		read_unlock(&ip6_frag_lock);
-
-		spin_lock(&fq->lock);
-		if (!(fq->last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->lock);
-
-		fq_put(fq, &work);
-		IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
-	}
+	evicted = inet_frag_evictor(&ip6_frags);
+	if (evicted)
+		IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 static void ip6_frag_expire(unsigned long data)
@@ -294,9 +196,9 @@
 	struct frag_queue *fq = (struct frag_queue *) data;
 	struct net_device *dev = NULL;
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & COMPLETE)
 		goto out;
 
 	fq_kill(fq);
@@ -311,7 +213,7 @@
 	rcu_read_unlock();
 
 	/* Don't send error if the first segment did not arrive. */
-	if (!(fq->last_in&FIRST_IN) || !fq->fragments)
+	if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments)
 		goto out;
 
 	/*
@@ -319,13 +221,13 @@
 	   segment was received. And do not use fq->dev
 	   pointer directly, device might already disappeared.
 	 */
-	fq->fragments->dev = dev;
-	icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+	fq->q.fragments->dev = dev;
+	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
 out:
 	if (dev)
 		dev_put(dev);
-	spin_unlock(&fq->lock);
-	fq_put(fq, NULL);
+	spin_unlock(&fq->q.lock);
+	fq_put(fq);
 }
 
 /* Creation primitives. */
@@ -339,32 +241,32 @@
 	struct hlist_node *n;
 #endif
 
-	write_lock(&ip6_frag_lock);
+	write_lock(&ip6_frags.lock);
 	hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
 #ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
 		if (fq->id == fq_in->id &&
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
 		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			write_unlock(&ip6_frag_lock);
-			fq_in->last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
+			atomic_inc(&fq->q.refcnt);
+			write_unlock(&ip6_frags.lock);
+			fq_in->q.last_in |= COMPLETE;
+			fq_put(fq_in);
 			return fq;
 		}
 	}
 #endif
 	fq = fq_in;
 
-	if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
-		atomic_inc(&fq->refcnt);
+	if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
+		atomic_inc(&fq->q.refcnt);
 
-	atomic_inc(&fq->refcnt);
-	hlist_add_head(&fq->list, &ip6_frag_hash[hash]);
-	INIT_LIST_HEAD(&fq->lru_list);
-	list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
-	ip6_frag_nqueues++;
-	write_unlock(&ip6_frag_lock);
+	atomic_inc(&fq->q.refcnt);
+	hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
+	INIT_LIST_HEAD(&fq->q.lru_list);
+	list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+	ip6_frags.nqueues++;
+	write_unlock(&ip6_frags.lock);
 	return fq;
 }
 
@@ -382,11 +284,11 @@
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	init_timer(&fq->timer);
-	fq->timer.function = ip6_frag_expire;
-	fq->timer.data = (long) fq;
-	spin_lock_init(&fq->lock);
-	atomic_set(&fq->refcnt, 1);
+	init_timer(&fq->q.timer);
+	fq->q.timer.function = ip6_frag_expire;
+	fq->q.timer.data = (long) fq;
+	spin_lock_init(&fq->q.lock);
+	atomic_set(&fq->q.refcnt, 1);
 
 	return ip6_frag_intern(fq);
 
@@ -403,30 +305,31 @@
 	struct hlist_node *n;
 	unsigned int hash;
 
-	read_lock(&ip6_frag_lock);
+	read_lock(&ip6_frags.lock);
 	hash = ip6qhashfn(id, src, dst);
-	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
 		if (fq->id == id &&
 		    ipv6_addr_equal(src, &fq->saddr) &&
 		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			read_unlock(&ip6_frag_lock);
+			atomic_inc(&fq->q.refcnt);
+			read_unlock(&ip6_frags.lock);
 			return fq;
 		}
 	}
-	read_unlock(&ip6_frag_lock);
+	read_unlock(&ip6_frags.lock);
 
 	return ip6_frag_create(id, src, dst, idev);
 }
 
 
-static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			   struct frag_hdr *fhdr, int nhoff)
 {
 	struct sk_buff *prev, *next;
+	struct net_device *dev;
 	int offset, end;
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & COMPLETE)
 		goto err;
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -439,7 +342,7 @@
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((u8 *)&fhdr->frag_off -
 				   skb_network_header(skb)));
-		return;
+		return -1;
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
@@ -454,11 +357,11 @@
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrupted.
 		 */
-		if (end < fq->len ||
-		    ((fq->last_in & LAST_IN) && end != fq->len))
+		if (end < fq->q.len ||
+		    ((fq->q.last_in & LAST_IN) && end != fq->q.len))
 			goto err;
-		fq->last_in |= LAST_IN;
-		fq->len = end;
+		fq->q.last_in |= LAST_IN;
+		fq->q.len = end;
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -471,13 +374,13 @@
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 					  offsetof(struct ipv6hdr, payload_len));
-			return;
+			return -1;
 		}
-		if (end > fq->len) {
+		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (fq->last_in & LAST_IN)
+			if (fq->q.last_in & LAST_IN)
 				goto err;
-			fq->len = end;
+			fq->q.len = end;
 		}
 	}
 
@@ -496,7 +399,7 @@
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for(next = fq->fragments; next != NULL; next = next->next) {
+	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -533,7 +436,7 @@
 			if (!pskb_pull(next, i))
 				goto err;
 			FRAG6_CB(next)->offset += i;	/* next fragment */
-			fq->meat -= i;
+			fq->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
@@ -548,9 +451,9 @@
 			if (prev)
 				prev->next = next;
 			else
-				fq->fragments = next;
+				fq->q.fragments = next;
 
-			fq->meat -= free_it->len;
+			fq->q.meat -= free_it->len;
 			frag_kfree_skb(free_it, NULL);
 		}
 	}
@@ -562,30 +465,37 @@
 	if (prev)
 		prev->next = skb;
 	else
-		fq->fragments = skb;
+		fq->q.fragments = skb;
 
-	if (skb->dev)
-		fq->iif = skb->dev->ifindex;
-	skb->dev = NULL;
-	fq->stamp = skb->tstamp;
-	fq->meat += skb->len;
-	atomic_add(skb->truesize, &ip6_frag_mem);
+	dev = skb->dev;
+	if (dev) {
+		fq->iif = dev->ifindex;
+		skb->dev = NULL;
+	}
+	fq->q.stamp = skb->tstamp;
+	fq->q.meat += skb->len;
+	atomic_add(skb->truesize, &ip6_frags.mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
 	 */
 	if (offset == 0) {
 		fq->nhoffset = nhoff;
-		fq->last_in |= FIRST_IN;
+		fq->q.last_in |= FIRST_IN;
 	}
-	write_lock(&ip6_frag_lock);
-	list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
-	write_unlock(&ip6_frag_lock);
-	return;
+
+	if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
+		return ip6_frag_reasm(fq, prev, dev);
+
+	write_lock(&ip6_frags.lock);
+	list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+	write_unlock(&ip6_frags.lock);
+	return -1;
 
 err:
 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
+	return -1;
 }
 
 /*
@@ -597,21 +507,39 @@
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
  */
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev)
 {
-	struct sk_buff *fp, *head = fq->fragments;
+	struct sk_buff *fp, *head = fq->q.fragments;
 	int    payload_len;
 	unsigned int nhoff;
 
 	fq_kill(fq);
 
+	/* Make the one we just received the head. */
+	if (prev) {
+		head = prev->next;
+		fp = skb_clone(head, GFP_ATOMIC);
+
+		if (!fp)
+			goto out_oom;
+
+		fp->next = head->next;
+		prev->next = fp;
+
+		skb_morph(head, fq->q.fragments);
+		head->next = fq->q.fragments->next;
+
+		kfree_skb(fq->q.fragments);
+		fq->q.fragments = head;
+	}
+
 	BUG_TRAP(head != NULL);
 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
-		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
@@ -640,7 +568,7 @@
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip6_frag_mem);
+		atomic_add(clone->truesize, &ip6_frags.mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -655,7 +583,7 @@
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &ip6_frag_mem);
+	atomic_sub(head->truesize, &ip6_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -665,17 +593,15 @@
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip6_frag_mem);
+		atomic_sub(fp->truesize, &ip6_frags.mem);
 	}
 
 	head->next = NULL;
 	head->dev = dev;
-	head->tstamp = fq->stamp;
+	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
 	IP6CB(head)->nhoff = nhoff;
 
-	*skb_in = head;
-
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(skb_network_header(head),
@@ -685,7 +611,7 @@
 	rcu_read_lock();
 	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
-	fq->fragments = NULL;
+	fq->q.fragments = NULL;
 	return 1;
 
 out_oversize:
@@ -702,10 +628,8 @@
 	return -1;
 }
 
-static int ipv6_frag_rcv(struct sk_buff **skbp)
+static int ipv6_frag_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
-	struct net_device *dev = skb->dev;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -739,23 +663,19 @@
 		return 1;
 	}
 
-	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+	if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
 		ip6_evictor(ip6_dst_idev(skb->dst));
 
 	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
 			  ip6_dst_idev(skb->dst))) != NULL) {
-		int ret = -1;
+		int ret;
 
-		spin_lock(&fq->lock);
+		spin_lock(&fq->q.lock);
 
-		ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
-		if (fq->last_in == (FIRST_IN|LAST_IN) &&
-		    fq->meat == fq->len)
-			ret = ip6_frag_reasm(fq, skbp, dev);
-
-		spin_unlock(&fq->lock);
-		fq_put(fq, NULL);
+		spin_unlock(&fq->q.lock);
+		fq_put(fq);
 		return ret;
 	}
 
@@ -775,11 +695,10 @@
 	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
 		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
 
-	ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				   (jiffies ^ (jiffies >> 6)));
-
-	init_timer(&ip6_frag_secret_timer);
-	ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
-	ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
-	add_timer(&ip6_frag_secret_timer);
+	ip6_frags.ctl = &ip6_frags_ctl;
+	ip6_frags.hashfn = ip6_hashfn;
+	ip6_frags.destructor = ip6_frag_free;
+	ip6_frags.skb_free = NULL;
+	ip6_frags.qsize = sizeof(struct frag_queue);
+	inet_frags_init(&ip6_frags);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6ff19f9..cce9941 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -663,7 +663,7 @@
 	return rt;
 }
 
-static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
 					    struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
@@ -682,7 +682,7 @@
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(fn, fl->iif, strict | reachable);
+	rt = rt6_select(fn, oif, strict | reachable);
 	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
@@ -735,6 +735,12 @@
 	return rt;
 }
 
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+					    struct flowi *fl, int flags)
+{
+	return ip6_pol_route(table, fl->iif, fl, flags);
+}
+
 void ip6_route_input(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -761,72 +767,7 @@
 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 					     struct flowi *fl, int flags)
 {
-	struct fib6_node *fn;
-	struct rt6_info *rt, *nrt;
-	int strict = 0;
-	int attempts = 3;
-	int err;
-	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
-
-	strict |= flags & RT6_LOOKUP_F_IFACE;
-
-relookup:
-	read_lock_bh(&table->tb6_lock);
-
-restart_2:
-	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
-
-restart:
-	rt = rt6_select(fn, fl->oif, strict | reachable);
-	BACKTRACK(&fl->fl6_src);
-	if (rt == &ip6_null_entry ||
-	    rt->rt6i_flags & RTF_CACHE)
-		goto out;
-
-	dst_hold(&rt->u.dst);
-	read_unlock_bh(&table->tb6_lock);
-
-	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
-	else {
-#if CLONE_OFFLINK_ROUTE
-		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
-		goto out2;
-#endif
-	}
-
-	dst_release(&rt->u.dst);
-	rt = nrt ? : &ip6_null_entry;
-
-	dst_hold(&rt->u.dst);
-	if (nrt) {
-		err = ip6_ins_rt(nrt);
-		if (!err)
-			goto out2;
-	}
-
-	if (--attempts <= 0)
-		goto out2;
-
-	/*
-	 * Race condition! In the gap, when table->tb6_lock was
-	 * released someone could insert this route.  Relookup.
-	 */
-	dst_release(&rt->u.dst);
-	goto relookup;
-
-out:
-	if (reachable) {
-		reachable = 0;
-		goto restart_2;
-	}
-	dst_hold(&rt->u.dst);
-	read_unlock_bh(&table->tb6_lock);
-out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
-	return rt;
+	return ip6_pol_route(table, fl->oif, fl, flags);
 }
 
 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3fb4427..68bb254 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -12,6 +12,7 @@
 #include <net/ndisc.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
+#include <net/inet_frag.h>
 
 #ifdef CONFIG_SYSCTL
 
@@ -41,7 +42,7 @@
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
 		.procname	= "ip6frag_high_thresh",
-		.data		= &sysctl_ip6frag_high_thresh,
+		.data		= &ip6_frags_ctl.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -49,7 +50,7 @@
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
 		.procname	= "ip6frag_low_thresh",
-		.data		= &sysctl_ip6frag_low_thresh,
+		.data		= &ip6_frags_ctl.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -57,7 +58,7 @@
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
 		.procname	= "ip6frag_time",
-		.data		= &sysctl_ip6frag_time,
+		.data		= &ip6_frags_ctl.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -66,7 +67,7 @@
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
 		.procname	= "ip6frag_secret_interval",
-		.data		= &sysctl_ip6frag_secret_interval,
+		.data		= &ip6_frags_ctl.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a07b59c..737b755 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1668,9 +1668,8 @@
 	return 0;
 }
 
-static int tcp_v6_rcv(struct sk_buff **pskb)
+static int tcp_v6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct tcphdr *th;
 	struct sock *sk;
 	int ret;
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 23e2809..6323921 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -87,9 +87,8 @@
 
 EXPORT_SYMBOL(xfrm6_tunnel_deregister);
 
-static int tunnel6_rcv(struct sk_buff **pskb)
+static int tunnel6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct xfrm6_tunnel *handler;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -106,9 +105,8 @@
 	return 0;
 }
 
-static int tunnel46_rcv(struct sk_buff **pskb)
+static int tunnel46_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct xfrm6_tunnel *handler;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 82ff26d..caebad6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -405,10 +405,9 @@
 	return 0;
 }
 
-int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
+int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 		   int proto)
 {
-	struct sk_buff *skb = *pskb;
 	struct sock *sk;
 	struct udphdr *uh;
 	struct net_device *dev = skb->dev;
@@ -494,9 +493,9 @@
 	return 0;
 }
 
-static __inline__ int udpv6_rcv(struct sk_buff **pskb)
+static __inline__ int udpv6_rcv(struct sk_buff *skb)
 {
-	return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
+	return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP);
 }
 
 /*
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 6e252f3..2d3fda60 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -6,7 +6,7 @@
 #include <net/addrconf.h>
 #include <net/inet_common.h>
 
-extern int  	__udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int );
+extern int  	__udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
 extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
 			       int , int , int , __be32 , struct hlist_head []);
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f54016a..766566f 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,9 +17,9 @@
 
 DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
 
-static int udplitev6_rcv(struct sk_buff **pskb)
+static int udplitev6_rcv(struct sk_buff *skb)
 {
-	return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
+	return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
 }
 
 static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index c858537..02f69e5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -133,9 +133,9 @@
 
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
-int xfrm6_rcv(struct sk_buff **pskb)
+int xfrm6_rcv(struct sk_buff *skb)
 {
-	return xfrm6_rcv_spi(*pskb, 0);
+	return xfrm6_rcv_spi(skb, 0);
 }
 
 EXPORT_SYMBOL(xfrm6_rcv);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4618c18..a5a32c1 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -80,7 +80,7 @@
 	while (likely((err = xfrm6_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
-		err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+		err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
 			      skb->dst->dev, dst_output);
 		if (unlikely(err != 1))
 			break;
@@ -88,7 +88,7 @@
 		if (!skb->dst->xfrm)
 			return dst_output(skb);
 
-		err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+		err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL,
 			      skb->dst->dev, xfrm6_output_finish2);
 		if (unlikely(err != 1))
 			break;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a523fa4..bed9ba0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -117,7 +117,7 @@
 EXPORT_SYMBOL(nf_unregister_hooks);
 
 unsigned int nf_iterate(struct list_head *head,
-			struct sk_buff **skb,
+			struct sk_buff *skb,
 			int hook,
 			const struct net_device *indev,
 			const struct net_device *outdev,
@@ -160,7 +160,7 @@
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev,
 		 struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *),
@@ -175,17 +175,17 @@
 
 	elem = &nf_hooks[pf][hook];
 next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+	verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
 		goto unlock;
 	} else if (verdict == NF_DROP) {
-		kfree_skb(*pskb);
+		kfree_skb(skb);
 		ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
+		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 	}
@@ -196,34 +196,24 @@
 EXPORT_SYMBOL(nf_hook_slow);
 
 
-int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 {
-	struct sk_buff *nskb;
-
-	if (writable_len > (*pskb)->len)
+	if (writable_len > skb->len)
 		return 0;
 
 	/* Not exclusive use of packet?  Must copy. */
-	if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
-		goto copy_skb;
-	if (skb_shared(*pskb))
-		goto copy_skb;
+	if (!skb_cloned(skb)) {
+		if (writable_len <= skb_headlen(skb))
+			return 1;
+	} else if (skb_clone_writable(skb, writable_len))
+		return 1;
 
-	return pskb_may_pull(*pskb, writable_len);
+	if (writable_len <= skb_headlen(skb))
+		writable_len = 0;
+	else
+		writable_len -= skb_headlen(skb);
 
-copy_skb:
-	nskb = skb_copy(*pskb, GFP_ATOMIC);
-	if (!nskb)
-		return 0;
-	BUG_ON(skb_is_nonlinear(nskb));
-
-	/* Rest of kernel will get very unhappy if we pass it a
-	   suddenly-orphaned skbuff */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
-	return 1;
+	return !!__pskb_pull_tail(skb, writable_len);
 }
 EXPORT_SYMBOL(skb_make_writable);
 
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index e42ab23..7b8239c 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -36,7 +36,7 @@
 module_param(ts_algo, charp, 0400);
 MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
 
-unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
 				   enum ip_conntrack_info ctinfo,
 				   unsigned int matchoff,
 				   unsigned int matchlen,
@@ -79,7 +79,7 @@
 	},
 };
 
-static int amanda_help(struct sk_buff **pskb,
+static int amanda_help(struct sk_buff *skb,
 		       unsigned int protoff,
 		       struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo)
@@ -101,25 +101,25 @@
 
 	/* increase the UDP timeout of the master connection as replies from
 	 * Amanda clients to the server can be quite delayed */
-	nf_ct_refresh(ct, *pskb, master_timeout * HZ);
+	nf_ct_refresh(ct, skb, master_timeout * HZ);
 
 	/* No data? */
 	dataoff = protoff + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len) {
+	if (dataoff >= skb->len) {
 		if (net_ratelimit())
-			printk("amanda_help: skblen = %u\n", (*pskb)->len);
+			printk("amanda_help: skblen = %u\n", skb->len);
 		return NF_ACCEPT;
 	}
 
 	memset(&ts, 0, sizeof(ts));
-	start = skb_find_text(*pskb, dataoff, (*pskb)->len,
+	start = skb_find_text(skb, dataoff, skb->len,
 			      search[SEARCH_CONNECT].ts, &ts);
 	if (start == UINT_MAX)
 		goto out;
 	start += dataoff + search[SEARCH_CONNECT].len;
 
 	memset(&ts, 0, sizeof(ts));
-	stop = skb_find_text(*pskb, start, (*pskb)->len,
+	stop = skb_find_text(skb, start, skb->len,
 			     search[SEARCH_NEWLINE].ts, &ts);
 	if (stop == UINT_MAX)
 		goto out;
@@ -127,13 +127,13 @@
 
 	for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
 		memset(&ts, 0, sizeof(ts));
-		off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
+		off = skb_find_text(skb, start, stop, search[i].ts, &ts);
 		if (off == UINT_MAX)
 			continue;
 		off += start + search[i].len;
 
 		len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
-		if (skb_copy_bits(*pskb, off, pbuf, len))
+		if (skb_copy_bits(skb, off, pbuf, len))
 			break;
 		pbuf[len] = '\0';
 
@@ -153,7 +153,7 @@
 
 		nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
-			ret = nf_nat_amanda(pskb, ctinfo, off - dataoff,
+			ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
 					    len, exp);
 		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 83c30b4..4d6171b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -307,7 +307,7 @@
 
 /* Confirm a connection given skb; places it in hash table */
 int
-__nf_conntrack_confirm(struct sk_buff **pskb)
+__nf_conntrack_confirm(struct sk_buff *skb)
 {
 	unsigned int hash, repl_hash;
 	struct nf_conntrack_tuple_hash *h;
@@ -316,7 +316,7 @@
 	struct hlist_node *n;
 	enum ip_conntrack_info ctinfo;
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	/* ipt_REJECT uses nf_conntrack_attach to attach related
 	   ICMP/TCP RST packets in other direction.  Actual packet
@@ -367,14 +367,14 @@
 	write_unlock_bh(&nf_conntrack_lock);
 	help = nfct_help(ct);
 	if (help && help->helper)
-		nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+		nf_conntrack_event_cache(IPCT_HELPER, skb);
 #ifdef CONFIG_NF_NAT_NEEDED
 	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
 	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+		nf_conntrack_event_cache(IPCT_NATINFO, skb);
 #endif
 	nf_conntrack_event_cache(master_ct(ct) ?
-				 IPCT_RELATED : IPCT_NEW, *pskb);
+				 IPCT_RELATED : IPCT_NEW, skb);
 	return NF_ACCEPT;
 
 out:
@@ -632,7 +632,7 @@
 }
 
 unsigned int
-nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
+nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -644,14 +644,14 @@
 	int ret;
 
 	/* Previously seen (loopback or untracked)?  Ignore. */
-	if ((*pskb)->nfct) {
+	if (skb->nfct) {
 		NF_CT_STAT_INC_ATOMIC(ignore);
 		return NF_ACCEPT;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
-	ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb),
+	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 				   &dataoff, &protonum);
 	if (ret <= 0) {
 		pr_debug("not prepared to track yet or error occured\n");
@@ -666,13 +666,13 @@
 	 * inverse of the return code tells to the netfilter
 	 * core what to do with the packet. */
 	if (l4proto->error != NULL &&
-	    (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+	    (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
 		NF_CT_STAT_INC_ATOMIC(error);
 		NF_CT_STAT_INC_ATOMIC(invalid);
 		return -ret;
 	}
 
-	ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto,
+	ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
 			       &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
@@ -686,21 +686,21 @@
 		return NF_DROP;
 	}
 
-	NF_CT_ASSERT((*pskb)->nfct);
+	NF_CT_ASSERT(skb->nfct);
 
-	ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
 	if (ret < 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
 		pr_debug("nf_conntrack_in: Can't track with proto module\n");
-		nf_conntrack_put((*pskb)->nfct);
-		(*pskb)->nfct = NULL;
+		nf_conntrack_put(skb->nfct);
+		skb->nfct = NULL;
 		NF_CT_STAT_INC_ATOMIC(invalid);
 		return -ret;
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_STATUS, *pskb);
+		nf_conntrack_event_cache(IPCT_STATUS, skb);
 
 	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index c763ee7..6df2590 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -43,7 +43,7 @@
 static int loose;
 module_param(loose, bool, 0600);
 
-unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				enum nf_ct_ftp_type type,
 				unsigned int matchoff,
@@ -344,7 +344,7 @@
 	}
 }
 
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
 		unsigned int protoff,
 		struct nf_conn *ct,
 		enum ip_conntrack_info ctinfo)
@@ -371,21 +371,21 @@
 		return NF_ACCEPT;
 	}
 
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	dataoff = protoff + th->doff * 4;
 	/* No data? */
-	if (dataoff >= (*pskb)->len) {
+	if (dataoff >= skb->len) {
 		pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
-			 (*pskb)->len);
+			 skb->len);
 		return NF_ACCEPT;
 	}
-	datalen = (*pskb)->len - dataoff;
+	datalen = skb->len - dataoff;
 
 	spin_lock_bh(&nf_ftp_lock);
-	fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
+	fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
 	BUG_ON(fb_ptr == NULL);
 
 	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
@@ -491,7 +491,7 @@
 	 * (possibly changed) expectation itself. */
 	nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
 	if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
+		ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
 				 matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
@@ -508,7 +508,7 @@
 	/* Now if this ends in \n, update ftp info.  Seq may have been
 	 * adjusted by NAT code. */
 	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info, dir, *pskb);
+		update_nl_seq(seq, ct_ftp_info, dir, skb);
  out:
 	spin_unlock_bh(&nf_ftp_lock);
 	return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a8a9dfb..f23fd95 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -47,27 +47,27 @@
 				     "(determined by routing information)");
 
 /* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff **pskb,
+int (*set_h245_addr_hook) (struct sk_buff *skb,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr,
 			   union nf_conntrack_address *addr, __be16 port)
 			   __read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff **pskb,
+int (*set_h225_addr_hook) (struct sk_buff *skb,
 			   unsigned char **data, int dataoff,
 			   TransportAddress *taddr,
 			   union nf_conntrack_address *addr, __be16 port)
 			   __read_mostly;
-int (*set_sig_addr_hook) (struct sk_buff **pskb,
+int (*set_sig_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  unsigned char **data,
 			  TransportAddress *taddr, int count) __read_mostly;
-int (*set_ras_addr_hook) (struct sk_buff **pskb,
+int (*set_ras_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  unsigned char **data,
 			  TransportAddress *taddr, int count) __read_mostly;
-int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  unsigned char **data, int dataoff,
@@ -75,25 +75,25 @@
 			  __be16 port, __be16 rtp_port,
 			  struct nf_conntrack_expect *rtp_exp,
 			  struct nf_conntrack_expect *rtcp_exp) __read_mostly;
-int (*nat_t120_hook) (struct sk_buff **pskb,
+int (*nat_t120_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
 		      unsigned char **data, int dataoff,
 		      H245_TransportAddress *taddr, __be16 port,
 		      struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_h245_hook) (struct sk_buff **pskb,
+int (*nat_h245_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
 		      unsigned char **data, int dataoff,
 		      TransportAddress *taddr, __be16 port,
 		      struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+int (*nat_callforwarding_hook) (struct sk_buff *skb,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
 				unsigned char **data, int dataoff,
 				TransportAddress *taddr, __be16 port,
 				struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_q931_hook) (struct sk_buff **pskb,
+int (*nat_q931_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
 		      unsigned char **data, TransportAddress *taddr, int idx,
@@ -108,7 +108,7 @@
 static struct nf_conntrack_helper nf_conntrack_helper_ras[];
 
 /****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
+static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
 			 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			 unsigned char **data, int *datalen, int *dataoff)
 {
@@ -122,7 +122,7 @@
 	int tpktoff;
 
 	/* Get TCP header */
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 
@@ -130,13 +130,13 @@
 	tcpdataoff = protoff + th->doff * 4;
 
 	/* Get TCP data length */
-	tcpdatalen = (*pskb)->len - tcpdataoff;
+	tcpdatalen = skb->len - tcpdataoff;
 	if (tcpdatalen <= 0)	/* No TCP data */
 		goto clear_out;
 
 	if (*data == NULL) {	/* first TPKT */
 		/* Get first TPKT pointer */
-		tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
+		tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
 					  h323_buffer);
 		BUG_ON(tpkt == NULL);
 
@@ -248,7 +248,7 @@
 }
 
 /****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr)
@@ -297,7 +297,7 @@
 		   (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
 		   ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+		ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -321,7 +321,7 @@
 }
 
 /****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
+static int expect_t120(struct sk_buff *skb,
 		       struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, int dataoff,
@@ -355,7 +355,7 @@
 	    (nat_t120 = rcu_dereference(nat_t120_hook)) &&
 	    ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr,
+		ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -371,7 +371,7 @@
 }
 
 /****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
+static int process_h245_channel(struct sk_buff *skb,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
 				unsigned char **data, int dataoff,
@@ -381,7 +381,7 @@
 
 	if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
 		/* RTP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 				      &channel->mediaChannel);
 		if (ret < 0)
 			return -1;
@@ -390,7 +390,7 @@
 	if (channel->
 	    options & eH2250LogicalChannelParameters_mediaControlChannel) {
 		/* RTCP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 				      &channel->mediaControlChannel);
 		if (ret < 0)
 			return -1;
@@ -400,7 +400,7 @@
 }
 
 /****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, int dataoff,
 		       OpenLogicalChannel *olc)
@@ -412,7 +412,7 @@
 	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
 	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
 	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+		ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
 					   &olc->
 					   forwardLogicalChannelParameters.
 					   multiplexParameters.
@@ -430,7 +430,7 @@
 		eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
 	{
 		ret =
-		    process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+		    process_h245_channel(skb, ct, ctinfo, data, dataoff,
 					 &olc->
 					 reverseLogicalChannelParameters.
 					 multiplexParameters.
@@ -448,7 +448,7 @@
 	    t120.choice == eDataProtocolCapability_separateLANStack &&
 	    olc->separateStack.networkAddress.choice ==
 	    eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_t120(skb, ct, ctinfo, data, dataoff,
 				  &olc->separateStack.networkAddress.
 				  localAreaAddress);
 		if (ret < 0)
@@ -459,7 +459,7 @@
 }
 
 /****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff,
 			OpenLogicalChannelAck *olca)
@@ -477,7 +477,7 @@
 		choice ==
 		eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
 	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+		ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
 					   &olca->
 					   reverseLogicalChannelParameters.
 					   multiplexParameters.
@@ -496,7 +496,7 @@
 		if (ack->options &
 		    eH2250LogicalChannelAckParameters_mediaChannel) {
 			/* RTP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+			ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 					      &ack->mediaChannel);
 			if (ret < 0)
 				return -1;
@@ -505,7 +505,7 @@
 		if (ack->options &
 		    eH2250LogicalChannelAckParameters_mediaControlChannel) {
 			/* RTCP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+			ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 					      &ack->mediaControlChannel);
 			if (ret < 0)
 				return -1;
@@ -515,7 +515,7 @@
 	if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
 		olca->separateStack.networkAddress.choice ==
 		eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_t120(skb, ct, ctinfo, data, dataoff,
 				  &olca->separateStack.networkAddress.
 				  localAreaAddress);
 		if (ret < 0)
@@ -526,7 +526,7 @@
 }
 
 /****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff,
 			MultimediaSystemControlMessage *mscm)
@@ -535,7 +535,7 @@
 	case eMultimediaSystemControlMessage_request:
 		if (mscm->request.choice ==
 		    eRequestMessage_openLogicalChannel) {
-			return process_olc(pskb, ct, ctinfo, data, dataoff,
+			return process_olc(skb, ct, ctinfo, data, dataoff,
 					   &mscm->request.openLogicalChannel);
 		}
 		pr_debug("nf_ct_h323: H.245 Request %d\n",
@@ -544,7 +544,7 @@
 	case eMultimediaSystemControlMessage_response:
 		if (mscm->response.choice ==
 		    eResponseMessage_openLogicalChannelAck) {
-			return process_olca(pskb, ct, ctinfo, data, dataoff,
+			return process_olca(skb, ct, ctinfo, data, dataoff,
 					    &mscm->response.
 					    openLogicalChannelAck);
 		}
@@ -560,7 +560,7 @@
 }
 
 /****************************************************************************/
-static int h245_help(struct sk_buff **pskb, unsigned int protoff,
+static int h245_help(struct sk_buff *skb, unsigned int protoff,
 		     struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	static MultimediaSystemControlMessage mscm;
@@ -574,12 +574,12 @@
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
 		return NF_ACCEPT;
 	}
-	pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Process each TPKT */
-	while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+	while (get_tpkt_data(skb, protoff, ct, ctinfo,
 			     &data, &datalen, &dataoff)) {
 		pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -596,7 +596,7 @@
 		}
 
 		/* Process H.245 signal */
-		if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+		if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
 			goto drop;
 	}
 
@@ -654,7 +654,7 @@
 }
 
 /****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, int dataoff,
 		       TransportAddress *taddr)
@@ -687,7 +687,7 @@
 	    (nat_h245 = rcu_dereference(nat_h245_hook)) &&
 	    ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr,
+		ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -758,7 +758,7 @@
 }
 
 /****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
+static int expect_callforwarding(struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
 				 unsigned char **data, int dataoff,
@@ -798,7 +798,7 @@
 	    (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
 	    ct->status & IPS_NAT_MASK) {
 		/* Need NAT */
-		ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
+		ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -814,7 +814,7 @@
 }
 
 /****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned char **data, int dataoff,
 			 Setup_UUIE *setup)
@@ -829,7 +829,7 @@
 	pr_debug("nf_ct_q931: Setup\n");
 
 	if (setup->options & eSetup_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &setup->h245Address);
 		if (ret < 0)
 			return -1;
@@ -846,7 +846,7 @@
 			 NIP6(*(struct in6_addr *)&addr), ntohs(port),
 			 NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
 			 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
+		ret = set_h225_addr(skb, data, dataoff,
 				    &setup->destCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.src.u3,
 				    ct->tuplehash[!dir].tuple.src.u.tcp.port);
@@ -864,7 +864,7 @@
 			 NIP6(*(struct in6_addr *)&addr), ntohs(port),
 			 NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
 			 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
+		ret = set_h225_addr(skb, data, dataoff,
 				    &setup->sourceCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.dst.u3,
 				    ct->tuplehash[!dir].tuple.dst.u.tcp.port);
@@ -874,7 +874,7 @@
 
 	if (setup->options & eSetup_UUIE_fastStart) {
 		for (i = 0; i < setup->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &setup->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -885,7 +885,7 @@
 }
 
 /****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
+static int process_callproceeding(struct sk_buff *skb,
 				  struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned char **data, int dataoff,
@@ -897,7 +897,7 @@
 	pr_debug("nf_ct_q931: CallProceeding\n");
 
 	if (callproc->options & eCallProceeding_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &callproc->h245Address);
 		if (ret < 0)
 			return -1;
@@ -905,7 +905,7 @@
 
 	if (callproc->options & eCallProceeding_UUIE_fastStart) {
 		for (i = 0; i < callproc->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &callproc->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -916,7 +916,7 @@
 }
 
 /****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned char **data, int dataoff,
 			   Connect_UUIE *connect)
@@ -927,7 +927,7 @@
 	pr_debug("nf_ct_q931: Connect\n");
 
 	if (connect->options & eConnect_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &connect->h245Address);
 		if (ret < 0)
 			return -1;
@@ -935,7 +935,7 @@
 
 	if (connect->options & eConnect_UUIE_fastStart) {
 		for (i = 0; i < connect->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &connect->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -946,7 +946,7 @@
 }
 
 /****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    unsigned char **data, int dataoff,
 			    Alerting_UUIE *alert)
@@ -957,7 +957,7 @@
 	pr_debug("nf_ct_q931: Alerting\n");
 
 	if (alert->options & eAlerting_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &alert->h245Address);
 		if (ret < 0)
 			return -1;
@@ -965,7 +965,7 @@
 
 	if (alert->options & eAlerting_UUIE_fastStart) {
 		for (i = 0; i < alert->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &alert->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -976,7 +976,7 @@
 }
 
 /****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    unsigned char **data, int dataoff,
 			    Facility_UUIE *facility)
@@ -988,7 +988,7 @@
 
 	if (facility->reason.choice == eFacilityReason_callForwarded) {
 		if (facility->options & eFacility_UUIE_alternativeAddress)
-			return expect_callforwarding(pskb, ct, ctinfo, data,
+			return expect_callforwarding(skb, ct, ctinfo, data,
 						     dataoff,
 						     &facility->
 						     alternativeAddress);
@@ -996,7 +996,7 @@
 	}
 
 	if (facility->options & eFacility_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &facility->h245Address);
 		if (ret < 0)
 			return -1;
@@ -1004,7 +1004,7 @@
 
 	if (facility->options & eFacility_UUIE_fastStart) {
 		for (i = 0; i < facility->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &facility->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1015,7 +1015,7 @@
 }
 
 /****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    unsigned char **data, int dataoff,
 			    Progress_UUIE *progress)
@@ -1026,7 +1026,7 @@
 	pr_debug("nf_ct_q931: Progress\n");
 
 	if (progress->options & eProgress_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &progress->h245Address);
 		if (ret < 0)
 			return -1;
@@ -1034,7 +1034,7 @@
 
 	if (progress->options & eProgress_UUIE_fastStart) {
 		for (i = 0; i < progress->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &progress->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1045,7 +1045,7 @@
 }
 
 /****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff, Q931 *q931)
 {
@@ -1055,28 +1055,28 @@
 
 	switch (pdu->h323_message_body.choice) {
 	case eH323_UU_PDU_h323_message_body_setup:
-		ret = process_setup(pskb, ct, ctinfo, data, dataoff,
+		ret = process_setup(skb, ct, ctinfo, data, dataoff,
 				    &pdu->h323_message_body.setup);
 		break;
 	case eH323_UU_PDU_h323_message_body_callProceeding:
-		ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
+		ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
 					     &pdu->h323_message_body.
 					     callProceeding);
 		break;
 	case eH323_UU_PDU_h323_message_body_connect:
-		ret = process_connect(pskb, ct, ctinfo, data, dataoff,
+		ret = process_connect(skb, ct, ctinfo, data, dataoff,
 				      &pdu->h323_message_body.connect);
 		break;
 	case eH323_UU_PDU_h323_message_body_alerting:
-		ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
+		ret = process_alerting(skb, ct, ctinfo, data, dataoff,
 				       &pdu->h323_message_body.alerting);
 		break;
 	case eH323_UU_PDU_h323_message_body_facility:
-		ret = process_facility(pskb, ct, ctinfo, data, dataoff,
+		ret = process_facility(skb, ct, ctinfo, data, dataoff,
 				       &pdu->h323_message_body.facility);
 		break;
 	case eH323_UU_PDU_h323_message_body_progress:
-		ret = process_progress(pskb, ct, ctinfo, data, dataoff,
+		ret = process_progress(skb, ct, ctinfo, data, dataoff,
 				       &pdu->h323_message_body.progress);
 		break;
 	default:
@@ -1090,7 +1090,7 @@
 
 	if (pdu->options & eH323_UU_PDU_h245Control) {
 		for (i = 0; i < pdu->h245Control.count; i++) {
-			ret = process_h245(pskb, ct, ctinfo, data, dataoff,
+			ret = process_h245(skb, ct, ctinfo, data, dataoff,
 					   &pdu->h245Control.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1101,7 +1101,7 @@
 }
 
 /****************************************************************************/
-static int q931_help(struct sk_buff **pskb, unsigned int protoff,
+static int q931_help(struct sk_buff *skb, unsigned int protoff,
 		     struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	static Q931 q931;
@@ -1115,12 +1115,12 @@
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
 		return NF_ACCEPT;
 	}
-	pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Process each TPKT */
-	while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+	while (get_tpkt_data(skb, protoff, ct, ctinfo,
 			     &data, &datalen, &dataoff)) {
 		pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -1136,7 +1136,7 @@
 		}
 
 		/* Process Q.931 signal */
-		if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
+		if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
 			goto drop;
 	}
 
@@ -1177,20 +1177,20 @@
 };
 
 /****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff,
+static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
 				   int *datalen)
 {
 	struct udphdr _uh, *uh;
 	int dataoff;
 
-	uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh);
+	uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
 	if (uh == NULL)
 		return NULL;
 	dataoff = protoff + sizeof(_uh);
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NULL;
-	*datalen = (*pskb)->len - dataoff;
-	return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
+	*datalen = skb->len - dataoff;
+	return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
 }
 
 /****************************************************************************/
@@ -1227,7 +1227,7 @@
 }
 
 /****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data,
 		       TransportAddress *taddr, int count)
@@ -1265,7 +1265,7 @@
 
 	nat_q931 = rcu_dereference(nat_q931_hook);
 	if (nat_q931 && ct->status & IPS_NAT_MASK) {	/* Need NAT */
-		ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
+		ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
 			pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1283,7 +1283,7 @@
 }
 
 /****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, GatekeeperRequest *grq)
 {
@@ -1293,13 +1293,13 @@
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK)	/* NATed */
-		return set_ras_addr(pskb, ct, ctinfo, data,
+		return set_ras_addr(skb, ct, ctinfo, data,
 				    &grq->rasAddress, 1);
 	return 0;
 }
 
 /****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, GatekeeperConfirm *gcf)
 {
@@ -1343,7 +1343,7 @@
 }
 
 /****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, RegistrationRequest *rrq)
 {
@@ -1353,7 +1353,7 @@
 
 	pr_debug("nf_ct_ras: RRQ\n");
 
-	ret = expect_q931(pskb, ct, ctinfo, data,
+	ret = expect_q931(skb, ct, ctinfo, data,
 			  rrq->callSignalAddress.item,
 			  rrq->callSignalAddress.count);
 	if (ret < 0)
@@ -1361,7 +1361,7 @@
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
+		ret = set_ras_addr(skb, ct, ctinfo, data,
 				   rrq->rasAddress.item,
 				   rrq->rasAddress.count);
 		if (ret < 0)
@@ -1378,7 +1378,7 @@
 }
 
 /****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, RegistrationConfirm *rcf)
 {
@@ -1392,7 +1392,7 @@
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, data,
 					rcf->callSignalAddress.item,
 					rcf->callSignalAddress.count);
 		if (ret < 0)
@@ -1407,7 +1407,7 @@
 	if (info->timeout > 0) {
 		pr_debug("nf_ct_ras: set RAS connection timeout to "
 			 "%u seconds\n", info->timeout);
-		nf_ct_refresh(ct, *pskb, info->timeout * HZ);
+		nf_ct_refresh(ct, skb, info->timeout * HZ);
 
 		/* Set expect timeout */
 		read_lock_bh(&nf_conntrack_lock);
@@ -1427,7 +1427,7 @@
 }
 
 /****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, UnregistrationRequest *urq)
 {
@@ -1440,7 +1440,7 @@
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, data,
 				   urq->callSignalAddress.item,
 				   urq->callSignalAddress.count);
 		if (ret < 0)
@@ -1453,13 +1453,13 @@
 	info->sig_port[!dir] = 0;
 
 	/* Give it 30 seconds for UCF or URJ */
-	nf_ct_refresh(ct, *pskb, 30 * HZ);
+	nf_ct_refresh(ct, skb, 30 * HZ);
 
 	return 0;
 }
 
 /****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, AdmissionRequest *arq)
 {
@@ -1479,7 +1479,7 @@
 	    port == info->sig_port[dir] &&
 	    set_h225_addr && ct->status & IPS_NAT_MASK) {
 		/* Answering ARQ */
-		return set_h225_addr(pskb, data, 0,
+		return set_h225_addr(skb, data, 0,
 				     &arq->destCallSignalAddress,
 				     &ct->tuplehash[!dir].tuple.dst.u3,
 				     info->sig_port[!dir]);
@@ -1491,7 +1491,7 @@
 	    !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
 	    set_h225_addr && ct->status & IPS_NAT_MASK) {
 		/* Calling ARQ */
-		return set_h225_addr(pskb, data, 0,
+		return set_h225_addr(skb, data, 0,
 				     &arq->srcCallSignalAddress,
 				     &ct->tuplehash[!dir].tuple.dst.u3,
 				     port);
@@ -1501,7 +1501,7 @@
 }
 
 /****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, AdmissionConfirm *acf)
 {
@@ -1522,7 +1522,7 @@
 		/* Answering ACF */
 		set_sig_addr = rcu_dereference(set_sig_addr_hook);
 		if (set_sig_addr && ct->status & IPS_NAT_MASK)
-			return set_sig_addr(pskb, ct, ctinfo, data,
+			return set_sig_addr(skb, ct, ctinfo, data,
 					    &acf->destCallSignalAddress, 1);
 		return 0;
 	}
@@ -1548,7 +1548,7 @@
 }
 
 /****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, LocationRequest *lrq)
 {
@@ -1558,13 +1558,13 @@
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK)
-		return set_ras_addr(pskb, ct, ctinfo, data,
+		return set_ras_addr(skb, ct, ctinfo, data,
 				    &lrq->replyAddress, 1);
 	return 0;
 }
 
 /****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, LocationConfirm *lcf)
 {
@@ -1603,7 +1603,7 @@
 }
 
 /****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, InfoRequestResponse *irr)
 {
@@ -1615,7 +1615,7 @@
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
+		ret = set_ras_addr(skb, ct, ctinfo, data,
 				   &irr->rasAddress, 1);
 		if (ret < 0)
 			return -1;
@@ -1623,7 +1623,7 @@
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, data,
 					irr->callSignalAddress.item,
 					irr->callSignalAddress.count);
 		if (ret < 0)
@@ -1634,40 +1634,40 @@
 }
 
 /****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, RasMessage *ras)
 {
 	switch (ras->choice) {
 	case eRasMessage_gatekeeperRequest:
-		return process_grq(pskb, ct, ctinfo, data,
+		return process_grq(skb, ct, ctinfo, data,
 				   &ras->gatekeeperRequest);
 	case eRasMessage_gatekeeperConfirm:
-		return process_gcf(pskb, ct, ctinfo, data,
+		return process_gcf(skb, ct, ctinfo, data,
 				   &ras->gatekeeperConfirm);
 	case eRasMessage_registrationRequest:
-		return process_rrq(pskb, ct, ctinfo, data,
+		return process_rrq(skb, ct, ctinfo, data,
 				   &ras->registrationRequest);
 	case eRasMessage_registrationConfirm:
-		return process_rcf(pskb, ct, ctinfo, data,
+		return process_rcf(skb, ct, ctinfo, data,
 				   &ras->registrationConfirm);
 	case eRasMessage_unregistrationRequest:
-		return process_urq(pskb, ct, ctinfo, data,
+		return process_urq(skb, ct, ctinfo, data,
 				   &ras->unregistrationRequest);
 	case eRasMessage_admissionRequest:
-		return process_arq(pskb, ct, ctinfo, data,
+		return process_arq(skb, ct, ctinfo, data,
 				   &ras->admissionRequest);
 	case eRasMessage_admissionConfirm:
-		return process_acf(pskb, ct, ctinfo, data,
+		return process_acf(skb, ct, ctinfo, data,
 				   &ras->admissionConfirm);
 	case eRasMessage_locationRequest:
-		return process_lrq(pskb, ct, ctinfo, data,
+		return process_lrq(skb, ct, ctinfo, data,
 				   &ras->locationRequest);
 	case eRasMessage_locationConfirm:
-		return process_lcf(pskb, ct, ctinfo, data,
+		return process_lcf(skb, ct, ctinfo, data,
 				   &ras->locationConfirm);
 	case eRasMessage_infoRequestResponse:
-		return process_irr(pskb, ct, ctinfo, data,
+		return process_irr(skb, ct, ctinfo, data,
 				   &ras->infoRequestResponse);
 	default:
 		pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
@@ -1678,7 +1678,7 @@
 }
 
 /****************************************************************************/
-static int ras_help(struct sk_buff **pskb, unsigned int protoff,
+static int ras_help(struct sk_buff *skb, unsigned int protoff,
 		    struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	static RasMessage ras;
@@ -1686,12 +1686,12 @@
 	int datalen = 0;
 	int ret;
 
-	pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_ras: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Get UDP data */
-	data = get_udp_data(pskb, protoff, &datalen);
+	data = get_udp_data(skb, protoff, &datalen);
 	if (data == NULL)
 		goto accept;
 	pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
@@ -1707,7 +1707,7 @@
 	}
 
 	/* Process RAS message */
-	if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
+	if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
 		goto drop;
 
       accept:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1562ca9..dfaed4b 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -30,7 +30,7 @@
 static char *irc_buffer;
 static DEFINE_SPINLOCK(irc_buffer_lock);
 
-unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				unsigned int matchoff,
 				unsigned int matchlen,
@@ -89,7 +89,7 @@
 	return 0;
 }
 
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	unsigned int dataoff;
@@ -116,22 +116,22 @@
 		return NF_ACCEPT;
 
 	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	/* No data? */
 	dataoff = protoff + th->doff*4;
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
 	spin_lock_bh(&irc_buffer_lock);
-	ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff,
+	ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
 				    irc_buffer);
 	BUG_ON(ib_ptr == NULL);
 
 	data = ib_ptr;
-	data_limit = ib_ptr + (*pskb)->len - dataoff;
+	data_limit = ib_ptr + skb->len - dataoff;
 
 	/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
 	 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -143,7 +143,7 @@
 		data += 5;
 		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
 
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n",
 			 NIPQUAD(iph->saddr), ntohs(th->source),
 			 NIPQUAD(iph->daddr), ntohs(th->dest));
@@ -193,7 +193,7 @@
 
 			nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
 			if (nf_nat_irc && ct->status & IPS_NAT_MASK)
-				ret = nf_nat_irc(pskb, ctinfo,
+				ret = nf_nat_irc(skb, ctinfo,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 1d59fab..9810d81 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -42,17 +42,17 @@
 module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	struct nf_conntrack_expect *exp;
-	struct iphdr *iph = ip_hdr(*pskb);
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+	struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct in_device *in_dev;
 	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
-	if ((*pskb)->sk == NULL)
+	if (skb->sk == NULL)
 		goto out;
 	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
 		goto out;
@@ -91,7 +91,7 @@
 	nf_ct_expect_related(exp);
 	nf_ct_expect_put(exp);
 
-	nf_ct_refresh(ct, *pskb, timeout * HZ);
+	nf_ct_refresh(ct, skb, timeout * HZ);
 out:
 	return NF_ACCEPT;
 }
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b080419..099b6df 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -41,14 +41,14 @@
 static DEFINE_SPINLOCK(nf_pptp_lock);
 
 int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
 			     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			     struct PptpControlHeader *ctlh,
 			     union pptp_ctrl_union *pptpReq) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
 
 int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
 			    struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			    struct PptpControlHeader *ctlh,
 			    union pptp_ctrl_union *pptpReq) __read_mostly;
@@ -254,7 +254,7 @@
 }
 
 static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
 		 struct PptpControlHeader *ctlh,
 		 union pptp_ctrl_union *pptpReq,
 		 unsigned int reqlen,
@@ -367,7 +367,7 @@
 
 	nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
 	if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
-		return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
+		return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
 	return NF_ACCEPT;
 
 invalid:
@@ -380,7 +380,7 @@
 }
 
 static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
 		  struct PptpControlHeader *ctlh,
 		  union pptp_ctrl_union *pptpReq,
 		  unsigned int reqlen,
@@ -462,7 +462,7 @@
 
 	nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
 	if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
-		return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
+		return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
 	return NF_ACCEPT;
 
 invalid:
@@ -492,7 +492,7 @@
 
 /* track caller id inside control connection, call expect_related */
 static int
-conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
+conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
 		    struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 
 {
@@ -502,7 +502,7 @@
 	struct pptp_pkt_hdr _pptph, *pptph;
 	struct PptpControlHeader _ctlh, *ctlh;
 	union pptp_ctrl_union _pptpReq, *pptpReq;
-	unsigned int tcplen = (*pskb)->len - protoff;
+	unsigned int tcplen = skb->len - protoff;
 	unsigned int datalen, reqlen, nexthdr_off;
 	int oldsstate, oldcstate;
 	int ret;
@@ -514,12 +514,12 @@
 		return NF_ACCEPT;
 
 	nexthdr_off = protoff;
-	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+	tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
 	BUG_ON(!tcph);
 	nexthdr_off += tcph->doff * 4;
 	datalen = tcplen - tcph->doff * 4;
 
-	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+	pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph);
 	if (!pptph) {
 		pr_debug("no full PPTP header, can't track\n");
 		return NF_ACCEPT;
@@ -534,7 +534,7 @@
 		return NF_ACCEPT;
 	}
 
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh)
 		return NF_ACCEPT;
 	nexthdr_off += sizeof(_ctlh);
@@ -547,7 +547,7 @@
 	if (reqlen > sizeof(*pptpReq))
 		reqlen = sizeof(*pptpReq);
 
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq);
 	if (!pptpReq)
 		return NF_ACCEPT;
 
@@ -560,11 +560,11 @@
 	 * established from PNS->PAC.  However, RFC makes no guarantee */
 	if (dir == IP_CT_DIR_ORIGINAL)
 		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+		ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
 					ctinfo);
 	else
 		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+		ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
 				       ctinfo);
 	pr_debug("sstate: %d->%d, cstate: %d->%d\n",
 		 oldsstate, info->sstate, oldcstate, info->cstate);
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 355d371..b5a16c6 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -56,7 +56,7 @@
 	/* other fields aren't interesting for conntrack */
 };
 
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
 		unsigned int protoff,
 		struct nf_conn *ct,
 		enum ip_conntrack_info ctinfo)
@@ -80,19 +80,19 @@
 		return NF_ACCEPT;
 
 	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	/* No data? */
 	dataoff = protoff + th->doff * 4;
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
-	datalen = (*pskb)->len - dataoff;
+	datalen = skb->len - dataoff;
 
 	spin_lock_bh(&nf_sane_lock);
-	sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer);
+	sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
 	BUG_ON(sb_ptr == NULL);
 
 	if (dir == IP_CT_DIR_ORIGINAL) {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d449fa4..8f8b5a4 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -36,13 +36,13 @@
 module_param(sip_timeout, uint, 0600);
 MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				struct nf_conn *ct,
 				const char **dptr) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
-unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				struct nf_conntrack_expect *exp,
 				const char *dptr) __read_mostly;
@@ -363,7 +363,7 @@
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_info);
 
-static int set_expected_rtp(struct sk_buff **pskb,
+static int set_expected_rtp(struct sk_buff *skb,
 			    struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    union nf_conntrack_address *addr,
@@ -385,7 +385,7 @@
 
 	nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook);
 	if (nf_nat_sdp && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp(pskb, ctinfo, exp, dptr);
+		ret = nf_nat_sdp(skb, ctinfo, exp, dptr);
 	else {
 		if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
@@ -397,7 +397,7 @@
 	return ret;
 }
 
-static int sip_help(struct sk_buff **pskb,
+static int sip_help(struct sk_buff *skb,
 		    unsigned int protoff,
 		    struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo)
@@ -414,13 +414,13 @@
 
 	/* No Data ? */
 	dataoff = protoff + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
-	nf_ct_refresh(ct, *pskb, sip_timeout * HZ);
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (!skb_is_nonlinear(*pskb))
-		dptr = (*pskb)->data + dataoff;
+	if (!skb_is_nonlinear(skb))
+		dptr = skb->data + dataoff;
 	else {
 		pr_debug("Copy of skbuff not supported yet.\n");
 		goto out;
@@ -428,13 +428,13 @@
 
 	nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
 	if (nf_nat_sip && ct->status & IPS_NAT_MASK) {
-		if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) {
+		if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) {
 			ret = NF_DROP;
 			goto out;
 		}
 	}
 
-	datalen = (*pskb)->len - dataoff;
+	datalen = skb->len - dataoff;
 	if (datalen < sizeof("SIP/2.0 200") - 1)
 		goto out;
 
@@ -464,7 +464,7 @@
 				ret = NF_DROP;
 				goto out;
 			}
-			ret = set_expected_rtp(pskb, ct, ctinfo, &addr,
+			ret = set_expected_rtp(skb, ct, ctinfo, &addr,
 					       htons(port), dptr);
 		}
 	}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index cc19506..e894aa1f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -29,12 +29,12 @@
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
 
-unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
 				 enum ip_conntrack_info ctinfo,
 				 struct nf_conntrack_expect *exp) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
 
-static int tftp_help(struct sk_buff **pskb,
+static int tftp_help(struct sk_buff *skb,
 		     unsigned int protoff,
 		     struct nf_conn *ct,
 		     enum ip_conntrack_info ctinfo)
@@ -46,7 +46,7 @@
 	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
 	typeof(nf_nat_tftp_hook) nf_nat_tftp;
 
-	tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr),
+	tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
 				 sizeof(_tftph), &_tftph);
 	if (tfh == NULL)
 		return NF_ACCEPT;
@@ -70,7 +70,7 @@
 
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
-			ret = nf_nat_tftp(pskb, ctinfo, exp);
+			ret = nf_nat_tftp(skb, ctinfo, exp);
 		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
 		nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0df7fff..196269c 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,7 +14,7 @@
 
 /* core.c */
 extern unsigned int nf_iterate(struct list_head *head,
-				struct sk_buff **skb,
+				struct sk_buff *skb,
 				int hook,
 				const struct net_device *indev,
 				const struct net_device *outdev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a481a34..0cef143 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -256,14 +256,14 @@
 
 	if (verdict == NF_ACCEPT) {
 		afinfo = nf_get_afinfo(info->pf);
-		if (!afinfo || afinfo->reroute(&skb, info) < 0)
+		if (!afinfo || afinfo->reroute(skb, info) < 0)
 			verdict = NF_DROP;
 	}
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
 		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-				     &skb, info->hook,
+				     skb, info->hook,
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49f0480..3ceeffc 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -617,6 +617,7 @@
 nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 {
 	int diff;
+	int err;
 
 	diff = data_len - e->skb->len;
 	if (diff < 0) {
@@ -626,25 +627,18 @@
 		if (data_len > 0xFFFF)
 			return -EINVAL;
 		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-
-			newskb = skb_copy_expand(e->skb,
-						 skb_headroom(e->skb),
-						 diff,
-						 GFP_ATOMIC);
-			if (newskb == NULL) {
+			err = pskb_expand_head(e->skb, 0,
+					       diff - skb_tailroom(e->skb),
+					       GFP_ATOMIC);
+			if (err) {
 				printk(KERN_WARNING "nf_queue: OOM "
 				      "in mangle, dropping packet\n");
-				return -ENOMEM;
+				return err;
 			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(&e->skb, data_len))
+	if (!skb_make_writable(e->skb, data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, data, data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 07a1b96..77eeae6 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -36,7 +36,7 @@
 {
 	const struct xt_classify_target_info *clinfo = targinfo;
 
-	(*pskb)->priority = clinfo->priority;
+	skb->priority = clinfo->priority;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 7043c27..8cc324b 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -34,7 +34,7 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -48,28 +48,28 @@
 	u_int32_t mark;
 	u_int32_t newmark;
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (ct) {
 		switch(markinfo->mode) {
 		case XT_CONNMARK_SET:
 			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
 			if (newmark != ct->mark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, *pskb);
+				nf_conntrack_event_cache(IPCT_MARK, skb);
 			}
 			break;
 		case XT_CONNMARK_SAVE:
 			newmark = (ct->mark & ~markinfo->mask) |
-				  ((*pskb)->mark & markinfo->mask);
+				  (skb->mark & markinfo->mask);
 			if (ct->mark != newmark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, *pskb);
+				nf_conntrack_event_cache(IPCT_MARK, skb);
 			}
 			break;
 		case XT_CONNMARK_RESTORE:
-			mark = (*pskb)->mark;
+			mark = skb->mark;
 			diff = (ct->mark ^ mark) & markinfo->mask;
-			(*pskb)->mark = mark ^ diff;
+			skb->mark = mark ^ diff;
 			break;
 		}
 	}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 63d7313..021b5c8 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -61,12 +61,11 @@
 	}
 }
 
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
 			   const void *targinfo)
 {
-	struct sk_buff *skb = *pskb;
 	const struct xt_connsecmark_target_info *info = targinfo;
 
 	switch (info->mode) {
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 798ab73..6322a93 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -25,7 +25,7 @@
 MODULE_ALIAS("ipt_DSCP");
 MODULE_ALIAS("ip6t_DSCP");
 
-static unsigned int target(struct sk_buff **pskb,
+static unsigned int target(struct sk_buff *skb,
 			   const struct net_device *in,
 			   const struct net_device *out,
 			   unsigned int hooknum,
@@ -33,20 +33,20 @@
 			   const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 
-		ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+		ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 
 	}
 	return XT_CONTINUE;
 }
 
-static unsigned int target6(struct sk_buff **pskb,
+static unsigned int target6(struct sk_buff *skb,
 			    const struct net_device *in,
 			    const struct net_device *out,
 			    unsigned int hooknum,
@@ -54,13 +54,13 @@
 			    const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
-		if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+		if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 			return NF_DROP;
 
-		ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+		ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index f30fe0b..bc6503d 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -22,7 +22,7 @@
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-target_v0(struct sk_buff **pskb,
+target_v0(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -31,12 +31,12 @@
 {
 	const struct xt_mark_target_info *markinfo = targinfo;
 
-	(*pskb)->mark = markinfo->mark;
+	skb->mark = markinfo->mark;
 	return XT_CONTINUE;
 }
 
 static unsigned int
-target_v1(struct sk_buff **pskb,
+target_v1(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -52,15 +52,15 @@
 		break;
 
 	case XT_MARK_AND:
-		mark = (*pskb)->mark & markinfo->mark;
+		mark = skb->mark & markinfo->mark;
 		break;
 
 	case XT_MARK_OR:
-		mark = (*pskb)->mark | markinfo->mark;
+		mark = skb->mark | markinfo->mark;
 		break;
 	}
 
-	(*pskb)->mark = mark;
+	skb->mark = mark;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index d3594c7..9fb449f 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -20,7 +20,7 @@
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_target(struct sk_buff **pskb,
+nflog_target(struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     unsigned int hooknum, const struct xt_target *target,
 	     const void *targinfo)
@@ -33,7 +33,7 @@
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(target->family, hooknum, *pskb, in, out, &li,
+	nf_log_packet(target->family, hooknum, skb, in, out, &li,
 		      "%s", info->prefix);
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 13f59f3..c3984e9 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,7 +24,7 @@
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index fec1aef..4976ce1 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -12,7 +12,7 @@
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -20,16 +20,16 @@
        const void *targinfo)
 {
 	/* Previously seen (loopback)? Ignore. */
-	if ((*pskb)->nfct != NULL)
+	if (skb->nfct != NULL)
 		return XT_CONTINUE;
 
 	/* Attach fake conntrack entry.
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	(*pskb)->nfct = &nf_conntrack_untracked.ct_general;
-	(*pskb)->nfctinfo = IP_CT_NEW;
-	nf_conntrack_get((*pskb)->nfct);
+	skb->nfct = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
 
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index c83779a..235806e 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -28,7 +28,7 @@
 
 static u8 mode;
 
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
 			   const void *targinfo)
@@ -47,7 +47,7 @@
 		BUG();
 	}
 
-	(*pskb)->secmark = secmark;
+	skb->secmark = secmark;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d40f7e4..07435a6 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -39,7 +39,7 @@
 }
 
 static int
-tcpmss_mangle_packet(struct sk_buff **pskb,
+tcpmss_mangle_packet(struct sk_buff *skb,
 		     const struct xt_tcpmss_info *info,
 		     unsigned int tcphoff,
 		     unsigned int minlen)
@@ -50,11 +50,11 @@
 	u16 newmss;
 	u8 *opt;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return -1;
 
-	tcplen = (*pskb)->len - tcphoff;
-	tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+	tcplen = skb->len - tcphoff;
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 
 	/* Since it passed flags test in tcp match, we know it is is
 	   not a fragment, and has data >= tcp header length.  SYN
@@ -64,19 +64,19 @@
 	if (tcplen != tcph->doff*4) {
 		if (net_ratelimit())
 			printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
-			       (*pskb)->len);
+			       skb->len);
 		return -1;
 	}
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-		if (dst_mtu((*pskb)->dst) <= minlen) {
+		if (dst_mtu(skb->dst) <= minlen) {
 			if (net_ratelimit())
 				printk(KERN_ERR "xt_TCPMSS: "
 				       "unknown or invalid path-MTU (%u)\n",
-				       dst_mtu((*pskb)->dst));
+				       dst_mtu(skb->dst));
 			return -1;
 		}
-		newmss = dst_mtu((*pskb)->dst) - minlen;
+		newmss = dst_mtu(skb->dst) - minlen;
 	} else
 		newmss = info->mss;
 
@@ -95,7 +95,7 @@
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = newmss & 0x00ff;
 
-			nf_proto_csum_replace2(&tcph->check, *pskb,
+			nf_proto_csum_replace2(&tcph->check, skb,
 					       htons(oldmss), htons(newmss), 0);
 			return 0;
 		}
@@ -104,57 +104,53 @@
 	/*
 	 * MSS Option not found ?! add it..
 	 */
-	if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
-		struct sk_buff *newskb;
-
-		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
-					 TCPOLEN_MSS, GFP_ATOMIC);
-		if (!newskb)
+	if (skb_tailroom(skb) < TCPOLEN_MSS) {
+		if (pskb_expand_head(skb, 0,
+				     TCPOLEN_MSS - skb_tailroom(skb),
+				     GFP_ATOMIC))
 			return -1;
-		kfree_skb(*pskb);
-		*pskb = newskb;
-		tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+		tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 	}
 
-	skb_put((*pskb), TCPOLEN_MSS);
+	skb_put(skb, TCPOLEN_MSS);
 
 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	nf_proto_csum_replace2(&tcph->check, *pskb,
+	nf_proto_csum_replace2(&tcph->check, skb,
 			       htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = newmss & 0x00ff;
 
-	nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
+	nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
 
 	oldval = ((__be16 *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	nf_proto_csum_replace2(&tcph->check, *pskb,
+	nf_proto_csum_replace2(&tcph->check, skb,
 				oldval, ((__be16 *)tcph)[6], 0);
 	return TCPOLEN_MSS;
 }
 
 static unsigned int
-xt_tcpmss_target4(struct sk_buff **pskb,
+xt_tcpmss_target4(struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
 	int ret;
 
-	ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4,
+	ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4,
 				   sizeof(*iph) + sizeof(struct tcphdr));
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		newlen = htons(ntohs(iph->tot_len) + ret);
 		nf_csum_replace2(&iph->check, iph->tot_len, newlen);
 		iph->tot_len = newlen;
@@ -164,30 +160,30 @@
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-xt_tcpmss_target6(struct sk_buff **pskb,
+xt_tcpmss_target6(struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
 	int tcphoff;
 	int ret;
 
 	nexthdr = ipv6h->nexthdr;
-	tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr);
+	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
 	if (tcphoff < 0) {
 		WARN_ON(1);
 		return NF_DROP;
 	}
-	ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff,
+	ret = tcpmss_mangle_packet(skb, targinfo, tcphoff,
 				   sizeof(*ipv6h) + sizeof(struct tcphdr));
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		ipv6h = ipv6_hdr(*pskb);
+		ipv6h = ipv6_hdr(skb);
 		ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 4df2ded..26c5d08 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -10,14 +10,14 @@
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
        const void *targinfo)
 {
-	(*pskb)->nf_trace = 1;
+	skb->nf_trace = 1;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c776bcd..98e313e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1378,6 +1378,8 @@
 		nl_table[unit].cb_mutex = cb_mutex;
 		nl_table[unit].module = module;
 		nl_table[unit].registered = 1;
+	} else {
+		kfree(listeners);
 	}
 	netlink_table_ungrab();
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6b407ec..fa006e0 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -202,11 +202,7 @@
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
 	 from earlier kernels */
-
-	/* iptables targets take a double skb pointer in case the skb
-	 * needs to be replaced. We don't own the skb, so this must not
-	 * happen. The pskb_expand_head above should make sure of this */
-	ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+	ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
 						   ipt->tcfi_hook,
 						   ipt->tcfi_t->u.kernel.target,
 						   ipt->tcfi_t->data);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 2d32fd2..3f8335e 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -205,20 +205,19 @@
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
 static unsigned int
-ing_hook(unsigned int hook, struct sk_buff **pskb,
+ing_hook(unsigned int hook, struct sk_buff *skb,
 			     const struct net_device *indev,
 			     const struct net_device *outdev,
 			     int (*okfn)(struct sk_buff *))
 {
 
 	struct Qdisc *q;
-	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
 	int fwres=NF_ACCEPT;
 
 	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
 		skb->sk ? "(owned)" : "(unowned)",
-		skb->dev ? (*pskb)->dev->name : "(no dev)",
+		skb->dev ? skb->dev->name : "(no dev)",
 		skb->len);
 
 	if (dev->qdisc_ingress) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9de3dda..eb4deaf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -954,9 +954,9 @@
 	.flags         = SCTP_PROTOSW_FLAG,
 };
 
-static int sctp6_rcv(struct sk_buff **pskb)
+static int sctp6_rcv(struct sk_buff *skb)
 {
-	return sctp_rcv(*pskb) ? -1 : 0;
+	return sctp_rcv(skb) ? -1 : 0;
 }
 
 static struct inet6_protocol sctpv6_protocol = {
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 8ebfc4d..5c69a72 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,6 +5,7 @@
 
 obj-$(CONFIG_SUNRPC) += sunrpc.o
 obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
 
 sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    auth.o auth_null.o auth_unix.o \
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 42b3220..8bd074d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -42,7 +42,7 @@
 {
 	u8 *ptr;
 	u8 pad;
-	int len = buf->len;
+	size_t len = buf->len;
 
 	if (len <= buf->head[0].iov_len) {
 		pad = *(u8 *)(buf->head[0].iov_base + len - 1);
@@ -53,9 +53,9 @@
 	} else
 		len -= buf->head[0].iov_len;
 	if (len <= buf->page_len) {
-		int last = (buf->page_base + len - 1)
+		unsigned int last = (buf->page_base + len - 1)
 					>>PAGE_CACHE_SHIFT;
-		int offset = (buf->page_base + len - 1)
+		unsigned int offset = (buf->page_base + len - 1)
 					& (PAGE_CACHE_SIZE - 1);
 		ptr = kmap_atomic(buf->pages[last], KM_USER0);
 		pad = *(ptr + offset);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52429b1..76be83e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -127,7 +127,14 @@
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_auth		*auth;
 	int err;
-	int len;
+	size_t len;
+
+	/* sanity check the name before trying to print it */
+	err = -EINVAL;
+	len = strlen(servname);
+	if (len > RPC_MAXNETNAMELEN)
+		goto out_no_rpciod;
+	len++;
 
 	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
 			program->name, servname, xprt);
@@ -148,7 +155,6 @@
 	clnt->cl_parent = clnt;
 
 	clnt->cl_server = clnt->cl_inline_name;
-	len = strlen(servname) + 1;
 	if (len > sizeof(clnt->cl_inline_name)) {
 		char *buf = kmalloc(len, GFP_KERNEL);
 		if (buf != 0)
@@ -234,8 +240,8 @@
 {
 	struct rpc_xprt *xprt;
 	struct rpc_clnt *clnt;
-	struct rpc_xprtsock_create xprtargs = {
-		.proto = args->protocol,
+	struct xprt_create xprtargs = {
+		.ident = args->protocol,
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
 		.addrlen = args->addrsize,
@@ -253,7 +259,7 @@
 	 */
 	if (args->servername == NULL) {
 		struct sockaddr_in *addr =
-					(struct sockaddr_in *) &args->address;
+					(struct sockaddr_in *) args->address;
 		snprintf(servername, sizeof(servername), NIPQUAD_FMT,
 			NIPQUAD(addr->sin_addr.s_addr));
 		args->servername = servername;
@@ -269,9 +275,6 @@
 	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
 		xprt->resvport = 0;
 
-	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
-			args->program->name, args->servername, xprt);
-
 	clnt = rpc_new_client(xprt, args->servername, args->program,
 				args->version, args->authflavor);
 	if (IS_ERR(clnt))
@@ -439,7 +442,7 @@
  */
 struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
 				      struct rpc_program *program,
-				      int vers)
+				      u32 vers)
 {
 	struct rpc_clnt *clnt;
 	struct rpc_version *version;
@@ -843,8 +846,7 @@
 	dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
 
 	if (RPC_IS_ASYNC(task) || !signalled()) {
-		xprt_release(task);
-		task->tk_action = call_reserve;
+		task->tk_action = call_allocate;
 		rpc_delay(task, HZ>>4);
 		return;
 	}
@@ -871,6 +873,7 @@
 	buf->head[0].iov_len = len;
 	buf->tail[0].iov_len = 0;
 	buf->page_len = 0;
+	buf->flags = 0;
 	buf->len = 0;
 	buf->buflen = len;
 }
@@ -937,7 +940,7 @@
 static void
 call_bind_status(struct rpc_task *task)
 {
-	int status = -EACCES;
+	int status = -EIO;
 
 	if (task->tk_status >= 0) {
 		dprint_status(task);
@@ -947,9 +950,20 @@
 	}
 
 	switch (task->tk_status) {
+	case -EAGAIN:
+		dprintk("RPC: %5u rpcbind waiting for another request "
+				"to finish\n", task->tk_pid);
+		/* avoid busy-waiting here -- could be a network outage. */
+		rpc_delay(task, 5*HZ);
+		goto retry_timeout;
 	case -EACCES:
 		dprintk("RPC: %5u remote rpcbind: RPC program/version "
 				"unavailable\n", task->tk_pid);
+		/* fail immediately if this is an RPC ping */
+		if (task->tk_msg.rpc_proc->p_proc == 0) {
+			status = -EOPNOTSUPP;
+			break;
+		}
 		rpc_delay(task, 3*HZ);
 		goto retry_timeout;
 	case -ETIMEDOUT:
@@ -957,6 +971,7 @@
 				task->tk_pid);
 		goto retry_timeout;
 	case -EPFNOSUPPORT:
+		/* server doesn't support any rpcbind version we know of */
 		dprintk("RPC: %5u remote rpcbind service unavailable\n",
 				task->tk_pid);
 		break;
@@ -969,7 +984,6 @@
 	default:
 		dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
 				task->tk_pid, -task->tk_status);
-		status = -EIO;
 	}
 
 	rpc_exit(task, status);
@@ -1257,7 +1271,6 @@
 {
 	dprint_status(task);
 
-	xprt_release(task);	/* Must do to obtain new XID */
 	task->tk_action = call_refreshresult;
 	task->tk_status = 0;
 	task->tk_client->cl_stats->rpcauthrefresh++;
@@ -1375,6 +1388,8 @@
 			dprintk("RPC: %5u %s: retry stale creds\n",
 					task->tk_pid, __FUNCTION__);
 			rpcauth_invalcred(task);
+			/* Ensure we obtain a new XID! */
+			xprt_release(task);
 			task->tk_action = call_refresh;
 			goto out_retry;
 		case RPC_AUTH_BADCRED:
@@ -1523,13 +1538,18 @@
 		spin_lock(&clnt->cl_lock);
 		list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
 			const char *rpc_waitq = "none";
+			int proc;
+
+			if (t->tk_msg.rpc_proc)
+				proc = t->tk_msg.rpc_proc->p_proc;
+			else
+				proc = -1;
 
 			if (RPC_IS_QUEUED(t))
 				rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
 
 			printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
-				t->tk_pid,
-				(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+				t->tk_pid, proc,
 				t->tk_flags, t->tk_status,
 				t->tk_client,
 				(t->tk_client ? t->tk_client->cl_prog : 0),
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 669e12a..c8433e8 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/dnotify.h>
+#include <linux/fsnotify.h>
 #include <linux/kernel.h>
 
 #include <asm/ioctls.h>
@@ -329,6 +329,7 @@
 			clnt->cl_prog, clnt->cl_vers);
 	seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
 	seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
+	seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
 	return 0;
 }
 
@@ -585,6 +586,7 @@
 		if (S_ISDIR(mode))
 			inc_nlink(dir);
 		d_add(dentry, inode);
+		fsnotify_create(dir, dentry);
 	}
 	mutex_unlock(&dir->i_mutex);
 	return 0;
@@ -606,7 +608,7 @@
 	inode->i_ino = iunique(dir->i_sb, 100);
 	d_instantiate(dentry, inode);
 	inc_nlink(dir);
-	inode_dir_notify(dir, DN_CREATE);
+	fsnotify_mkdir(dir, dentry);
 	return 0;
 out_err:
 	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -748,7 +750,7 @@
 	rpci->flags = flags;
 	rpci->ops = ops;
 	rpci->nkern_readwriters = 1;
-	inode_dir_notify(dir, DN_CREATE);
+	fsnotify_create(dir, dentry);
 	dget(dentry);
 out:
 	mutex_unlock(&dir->i_mutex);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d1740db..a05493a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,11 +16,14 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_BIND
@@ -91,26 +94,6 @@
 #define RPCB_MAXADDRLEN		(128u)
 
 /*
- * r_netid
- *
- * Quoting RFC 3530, section 2.2:
- *
- * For TCP over IPv4 the value of r_netid is the string "tcp".  For UDP
- * over IPv4 the value of r_netid is the string "udp".
- *
- * ...
- *
- * For TCP over IPv6 the value of r_netid is the string "tcp6".  For UDP
- * over IPv6 the value of r_netid is the string "udp6".
- */
-#define RPCB_NETID_UDP	"\165\144\160"		/* "udp" */
-#define RPCB_NETID_TCP	"\164\143\160"		/* "tcp" */
-#define RPCB_NETID_UDP6	"\165\144\160\066"	/* "udp6" */
-#define RPCB_NETID_TCP6	"\164\143\160\066"	/* "tcp6" */
-
-#define RPCB_MAXNETIDLEN	(4u)
-
-/*
  * r_owner
  *
  * The "owner" is allowed to unset a service in the rpcbind database.
@@ -120,7 +103,7 @@
 #define RPCB_MAXOWNERLEN	sizeof(RPCB_OWNER_STRING)
 
 static void			rpcb_getport_done(struct rpc_task *, void *);
-extern struct rpc_program	rpcb_program;
+static struct rpc_program	rpcb_program;
 
 struct rpcbind_args {
 	struct rpc_xprt *	r_xprt;
@@ -137,10 +120,13 @@
 static struct rpc_procinfo rpcb_procedures2[];
 static struct rpc_procinfo rpcb_procedures3[];
 
-static struct rpcb_info {
+struct rpcb_info {
 	int			rpc_vers;
 	struct rpc_procinfo *	rpc_proc;
-} rpcb_next_version[];
+};
+
+static struct rpcb_info rpcb_next_version[];
+static struct rpcb_info rpcb_next_version6[];
 
 static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
 {
@@ -190,7 +176,17 @@
 				   RPC_CLNT_CREATE_INTR),
 	};
 
-	((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+	switch (srvaddr->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
+		break;
+	default:
+		return NULL;
+	}
+
 	if (!privileged)
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 	return rpc_create(&args);
@@ -234,7 +230,7 @@
 			prog, vers, prot, port);
 
 	rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
-					IPPROTO_UDP, 2, 1);
+					XPRT_TRANSPORT_UDP, 2, 1);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -316,6 +312,7 @@
 	struct rpc_task	*child;
 	struct sockaddr addr;
 	int status;
+	struct rpcb_info *info;
 
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __FUNCTION__,
@@ -325,7 +322,7 @@
 	BUG_ON(clnt->cl_parent != clnt);
 
 	if (xprt_test_and_set_binding(xprt)) {
-		status = -EACCES;		/* tell caller to check again */
+		status = -EAGAIN;	/* tell caller to check again */
 		dprintk("RPC: %5u %s: waiting for another binder\n",
 			task->tk_pid, __FUNCTION__);
 		goto bailout_nowake;
@@ -343,18 +340,43 @@
 		goto bailout_nofree;
 	}
 
-	if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+	rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+
+	/* Don't ever use rpcbind v2 for AF_INET6 requests */
+	switch (addr.sa_family) {
+	case AF_INET:
+		info = rpcb_next_version;
+		break;
+	case AF_INET6:
+		info = rpcb_next_version6;
+		break;
+	default:
+		status = -EAFNOSUPPORT;
+		dprintk("RPC: %5u %s: bad address family\n",
+				task->tk_pid, __FUNCTION__);
+		goto bailout_nofree;
+	}
+	if (info[xprt->bind_index].rpc_proc == NULL) {
 		xprt->bind_index = 0;
-		status = -EACCES;	/* tell caller to try again later */
+		status = -EPFNOSUPPORT;
 		dprintk("RPC: %5u %s: no more getport versions available\n",
 			task->tk_pid, __FUNCTION__);
 		goto bailout_nofree;
 	}
-	bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+	bind_version = info[xprt->bind_index].rpc_vers;
 
 	dprintk("RPC: %5u %s: trying rpcbind version %u\n",
 		task->tk_pid, __FUNCTION__, bind_version);
 
+	rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+				bind_version, 0);
+	if (IS_ERR(rpcb_clnt)) {
+		status = PTR_ERR(rpcb_clnt);
+		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
+			task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
+		goto bailout_nofree;
+	}
+
 	map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
 	if (!map) {
 		status = -ENOMEM;
@@ -367,28 +389,19 @@
 	map->r_prot = xprt->prot;
 	map->r_port = 0;
 	map->r_xprt = xprt_get(xprt);
-	map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
-						   RPCB_NETID_UDP;
-	memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
-			sizeof(map->r_addr));
+	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+	memcpy(map->r_addr,
+	       rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
+	       sizeof(map->r_addr));
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
-	rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
-	rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
-	if (IS_ERR(rpcb_clnt)) {
-		status = PTR_ERR(rpcb_clnt);
-		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
-			task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
-		goto bailout;
-	}
-
 	child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;
 		dprintk("RPC: %5u %s: rpc_run_task failed\n",
 			task->tk_pid, __FUNCTION__);
-		goto bailout_nofree;
+		goto bailout;
 	}
 	rpc_put_task(child);
 
@@ -403,6 +416,7 @@
 bailout_nowake:
 	task->tk_status = status;
 }
+EXPORT_SYMBOL_GPL(rpcb_getport_async);
 
 /*
  * Rpcbind child task calls this callback via tk_exit.
@@ -413,6 +427,10 @@
 	struct rpc_xprt *xprt = map->r_xprt;
 	int status = child->tk_status;
 
+	/* Garbage reply: retry with a lesser rpcbind version */
+	if (status == -EIO)
+		status = -EPROTONOSUPPORT;
+
 	/* rpcbind server doesn't support this rpcbind protocol version */
 	if (status == -EPROTONOSUPPORT)
 		xprt->bind_index++;
@@ -490,16 +508,24 @@
 			       unsigned short *portp)
 {
 	char *addr;
-	int addr_len, c, i, f, first, val;
+	u32 addr_len;
+	int c, i, f, first, val;
 
 	*portp = 0;
-	addr_len = (unsigned int) ntohl(*p++);
-	if (addr_len > RPCB_MAXADDRLEN)			/* sanity */
-		return -EINVAL;
+	addr_len = ntohl(*p++);
 
-	dprintk("RPC:       rpcb_decode_getaddr returned string: '%s'\n",
-			(char *) p);
+	/*
+	 * Simple sanity check.  The smallest possible universal
+	 * address is an IPv4 address string containing 11 bytes.
+	 */
+	if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+		goto out_err;
 
+	/*
+	 * Start at the end and walk backwards until the first dot
+	 * is encountered.  When the second dot is found, we have
+	 * both parts of the port number.
+	 */
 	addr = (char *)p;
 	val = 0;
 	first = 1;
@@ -521,8 +547,19 @@
 		}
 	}
 
+	/*
+	 * Simple sanity check.  If we never saw a dot in the reply,
+	 * then this was probably just garbage.
+	 */
+	if (first)
+		goto out_err;
+
 	dprintk("RPC:       rpcb_decode_getaddr port=%u\n", *portp);
 	return 0;
+
+out_err:
+	dprintk("RPC:       rpcbind server returned malformed reply\n");
+	return -EIO;
 }
 
 #define RPCB_program_sz		(1u)
@@ -531,7 +568,7 @@
 #define RPCB_port_sz		(1u)
 #define RPCB_boolean_sz		(1u)
 
-#define RPCB_netid_sz		(1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
+#define RPCB_netid_sz		(1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
 #define RPCB_addr_sz		(1+XDR_QUADLEN(RPCB_MAXADDRLEN))
 #define RPCB_ownerstring_sz	(1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
 
@@ -593,6 +630,14 @@
 	{ 0, NULL },
 };
 
+static struct rpcb_info rpcb_next_version6[] = {
+#ifdef CONFIG_SUNRPC_BIND34
+	{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
+	{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+#endif
+	{ 0, NULL },
+};
+
 static struct rpc_version rpcb_version2 = {
 	.number		= 2,
 	.nrprocs	= RPCB_HIGHPROC_2,
@@ -621,7 +666,7 @@
 
 static struct rpc_stat rpcb_stats;
 
-struct rpc_program rpcb_program = {
+static struct rpc_program rpcb_program = {
 	.name		= "rpcbind",
 	.number		= RPCBIND_PROGRAM,
 	.nrvers		= ARRAY_SIZE(rpcb_version),
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 954d7ec..3c773c5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -777,6 +777,7 @@
 			task->tk_pid, size, buf);
 	return &buf->data;
 }
+EXPORT_SYMBOL_GPL(rpc_malloc);
 
 /**
  * rpc_free - free buffer allocated via rpc_malloc
@@ -802,6 +803,7 @@
 	else
 		kfree(buf);
 }
+EXPORT_SYMBOL_GPL(rpc_free);
 
 /*
  * Creation and deletion of RPC task structures
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1d377d1..97ac45f0 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -34,6 +34,7 @@
 	desc->offset += len;
 	return len;
 }
+EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
 
 /**
  * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -137,6 +138,7 @@
 out:
 	return copied;
 }
+EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
 
 /**
  * csum_partial_copy_to_xdr - checksum and copy data
@@ -179,3 +181,4 @@
 		return -1;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 384c4ad..33d89e8 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -20,7 +20,7 @@
 #include <linux/sunrpc/auth.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-
+#include <linux/sunrpc/xprtsock.h>
 
 /* RPC scheduler */
 EXPORT_SYMBOL(rpc_execute);
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index 8142fdb..31becbf 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -17,6 +17,7 @@
 
 #include <linux/types.h>
 #include <linux/unistd.h>
+#include <linux/module.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -40,6 +41,7 @@
 		rt->ntimeouts[i] = 0;
 	}
 }
+EXPORT_SYMBOL_GPL(rpc_init_rtt);
 
 /*
  * NB: When computing the smoothed RTT and standard deviation,
@@ -75,6 +77,7 @@
 	if (*sdrtt < RPC_RTO_MIN)
 		*sdrtt = RPC_RTO_MIN;
 }
+EXPORT_SYMBOL_GPL(rpc_update_rtt);
 
 /*
  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
@@ -103,3 +106,4 @@
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(rpc_calc_rto);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c8c2edc..282a9a2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,9 @@
 static void	xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 
+static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(xprt_list);
+
 /*
  * The transport code maintains an estimate on the maximum number of out-
  * standing RPC requests, using a smoothed version of the congestion
@@ -81,6 +84,78 @@
 #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
 
 /**
+ * xprt_register_transport - register a transport implementation
+ * @transport: transport to register
+ *
+ * If a transport implementation is loaded as a kernel module, it can
+ * call this interface to make itself known to the RPC client.
+ *
+ * Returns:
+ * 0:		transport successfully registered
+ * -EEXIST:	transport already registered
+ * -EINVAL:	transport module being unloaded
+ */
+int xprt_register_transport(struct xprt_class *transport)
+{
+	struct xprt_class *t;
+	int result;
+
+	result = -EEXIST;
+	spin_lock(&xprt_list_lock);
+	list_for_each_entry(t, &xprt_list, list) {
+		/* don't register the same transport class twice */
+		if (t->ident == transport->ident)
+			goto out;
+	}
+
+	result = -EINVAL;
+	if (try_module_get(THIS_MODULE)) {
+		list_add_tail(&transport->list, &xprt_list);
+		printk(KERN_INFO "RPC: Registered %s transport module.\n",
+			transport->name);
+		result = 0;
+	}
+
+out:
+	spin_unlock(&xprt_list_lock);
+	return result;
+}
+EXPORT_SYMBOL_GPL(xprt_register_transport);
+
+/**
+ * xprt_unregister_transport - unregister a transport implementation
+ * transport: transport to unregister
+ *
+ * Returns:
+ * 0:		transport successfully unregistered
+ * -ENOENT:	transport never registered
+ */
+int xprt_unregister_transport(struct xprt_class *transport)
+{
+	struct xprt_class *t;
+	int result;
+
+	result = 0;
+	spin_lock(&xprt_list_lock);
+	list_for_each_entry(t, &xprt_list, list) {
+		if (t == transport) {
+			printk(KERN_INFO
+				"RPC: Unregistered %s transport module.\n",
+				transport->name);
+			list_del_init(&transport->list);
+			module_put(THIS_MODULE);
+			goto out;
+		}
+	}
+	result = -ENOENT;
+
+out:
+	spin_unlock(&xprt_list_lock);
+	return result;
+}
+EXPORT_SYMBOL_GPL(xprt_unregister_transport);
+
+/**
  * xprt_reserve_xprt - serialize write access to transports
  * @task: task that is requesting access to the transport
  *
@@ -118,6 +193,7 @@
 		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
 
 static void xprt_clear_locked(struct rpc_xprt *xprt)
 {
@@ -167,6 +243,7 @@
 		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
 
 static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -246,6 +323,7 @@
 		__xprt_lock_write_next(xprt);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_release_xprt);
 
 /**
  * xprt_release_xprt_cong - allow other requests to use a transport
@@ -262,6 +340,7 @@
 		__xprt_lock_write_next_cong(xprt);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
 
 static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -314,6 +393,7 @@
 {
 	__xprt_put_cong(task->tk_xprt, task->tk_rqstp);
 }
+EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
 /**
  * xprt_adjust_cwnd - adjust transport congestion window
@@ -345,6 +425,7 @@
 	xprt->cwnd = cwnd;
 	__xprt_put_cong(xprt, req);
 }
+EXPORT_SYMBOL_GPL(xprt_adjust_cwnd);
 
 /**
  * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
@@ -359,6 +440,7 @@
 	else
 		rpc_wake_up(&xprt->pending);
 }
+EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
 
 /**
  * xprt_wait_for_buffer_space - wait for transport output buffer to clear
@@ -373,6 +455,7 @@
 	task->tk_timeout = req->rq_timeout;
 	rpc_sleep_on(&xprt->pending, task, NULL, NULL);
 }
+EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
 
 /**
  * xprt_write_space - wake the task waiting for transport output buffer space
@@ -393,6 +476,7 @@
 	}
 	spin_unlock_bh(&xprt->transport_lock);
 }
+EXPORT_SYMBOL_GPL(xprt_write_space);
 
 /**
  * xprt_set_retrans_timeout_def - set a request's retransmit timeout
@@ -406,6 +490,7 @@
 {
 	task->tk_timeout = task->tk_rqstp->rq_timeout;
 }
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
 
 /*
  * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
@@ -425,6 +510,7 @@
 	if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
 		task->tk_timeout = max_timeout;
 }
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
 
 static void xprt_reset_majortimeo(struct rpc_rqst *req)
 {
@@ -500,6 +586,7 @@
 	xprt_wake_pending_tasks(xprt, -ENOTCONN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
+EXPORT_SYMBOL_GPL(xprt_disconnect);
 
 static void
 xprt_init_autodisconnect(unsigned long data)
@@ -610,6 +697,7 @@
 	xprt->stat.bad_xids++;
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
 /**
  * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
@@ -629,6 +717,7 @@
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
 
 /**
  * xprt_complete_rqst - called when reply processing is complete
@@ -653,6 +742,7 @@
 	req->rq_received = req->rq_private_buf.len = copied;
 	rpc_wake_up_task(task);
 }
+EXPORT_SYMBOL_GPL(xprt_complete_rqst);
 
 static void xprt_timer(struct rpc_task *task)
 {
@@ -889,23 +979,25 @@
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
 	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
+	struct xprt_class *t;
 
-	switch (args->proto) {
-	case IPPROTO_UDP:
-		xprt = xs_setup_udp(args);
-		break;
-	case IPPROTO_TCP:
-		xprt = xs_setup_tcp(args);
-		break;
-	default:
-		printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
-				args->proto);
-		return ERR_PTR(-EIO);
+	spin_lock(&xprt_list_lock);
+	list_for_each_entry(t, &xprt_list, list) {
+		if (t->ident == args->ident) {
+			spin_unlock(&xprt_list_lock);
+			goto found;
+		}
 	}
+	spin_unlock(&xprt_list_lock);
+	printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+	return ERR_PTR(-EIO);
+
+found:
+	xprt = t->setup(args);
 	if (IS_ERR(xprt)) {
 		dprintk("RPC:       xprt_create_transport: failed, %ld\n",
 				-PTR_ERR(xprt));
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
new file mode 100644
index 0000000..264f0fe
--- /dev/null
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
+
+xprtrdma-y := transport.o rpc_rdma.o verbs.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
new file mode 100644
index 0000000..12db635
--- /dev/null
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * rpc_rdma.c
+ *
+ * This file contains the guts of the RPC RDMA protocol, and
+ * does marshaling/unmarshaling, etc. It is also where interfacing
+ * to the Linux RPC framework lives.
+ */
+
+#include "xprt_rdma.h"
+
+#include <linux/highmem.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+enum rpcrdma_chunktype {
+	rpcrdma_noch = 0,
+	rpcrdma_readch,
+	rpcrdma_areadch,
+	rpcrdma_writech,
+	rpcrdma_replych
+};
+
+#ifdef RPC_DEBUG
+static const char transfertypes[][12] = {
+	"pure inline",	/* no chunks */
+	" read chunk",	/* some argument via rdma read */
+	"*read chunk",	/* entire request via rdma read */
+	"write chunk",	/* some result via rdma write */
+	"reply chunk"	/* entire reply via rdma write */
+};
+#endif
+
+/*
+ * Chunk assembly from upper layer xdr_buf.
+ *
+ * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+ * elements. Segments are then coalesced when registered, if possible
+ * within the selected memreg mode.
+ *
+ * Note, this routine is never called if the connection's memory
+ * registration strategy is 0 (bounce buffers).
+ */
+
+static int
+rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
+	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
+{
+	int len, n = 0, p;
+
+	if (pos == 0 && xdrbuf->head[0].iov_len) {
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = xdrbuf->head[0].iov_base;
+		seg[n].mr_len = xdrbuf->head[0].iov_len;
+		pos += xdrbuf->head[0].iov_len;
+		++n;
+	}
+
+	if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
+		if (n == nsegs)
+			return 0;
+		seg[n].mr_page = xdrbuf->pages[0];
+		seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
+		seg[n].mr_len = min_t(u32,
+			PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
+		len = xdrbuf->page_len - seg[n].mr_len;
+		pos += len;
+		++n;
+		p = 1;
+		while (len > 0) {
+			if (n == nsegs)
+				return 0;
+			seg[n].mr_page = xdrbuf->pages[p];
+			seg[n].mr_offset = NULL;
+			seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
+			len -= seg[n].mr_len;
+			++n;
+			++p;
+		}
+	}
+
+	if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) {
+		if (n == nsegs)
+			return 0;
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = xdrbuf->tail[0].iov_base;
+		seg[n].mr_len = xdrbuf->tail[0].iov_len;
+		pos += xdrbuf->tail[0].iov_len;
+		++n;
+	}
+
+	if (pos < xdrbuf->len)
+		dprintk("RPC:       %s: marshaled only %d of %d\n",
+				__func__, pos, xdrbuf->len);
+
+	return n;
+}
+
+/*
+ * Create read/write chunk lists, and reply chunks, for RDMA
+ *
+ *   Assume check against THRESHOLD has been done, and chunks are required.
+ *   Assume only encoding one list entry for read|write chunks. The NFSv3
+ *     protocol is simple enough to allow this as it only has a single "bulk
+ *     result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
+ *     RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
+ *
+ * When used for a single reply chunk (which is a special write
+ * chunk used for the entire reply, rather than just the data), it
+ * is used primarily for READDIR and READLINK which would otherwise
+ * be severely size-limited by a small rdma inline read max. The server
+ * response will come back as an RDMA Write, followed by a message
+ * of type RDMA_NOMSG carrying the xid and length. As a result, reply
+ * chunks do not provide data alignment, however they do not require
+ * "fixup" (moving the response to the upper layer buffer) either.
+ *
+ * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
+ *
+ *  Read chunklist (a linked list):
+ *   N elements, position P (same P for all chunks of same arg!):
+ *    1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
+ *
+ *  Write chunklist (a list of (one) counted array):
+ *   N elements:
+ *    1 - N - HLOO - HLOO - ... - HLOO - 0
+ *
+ *  Reply chunk (a counted array):
+ *   N elements:
+ *    1 - N - HLOO - HLOO - ... - HLOO
+ */
+
+static unsigned int
+rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+		struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
+{
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
+	int nsegs, nchunks = 0;
+	int pos;
+	struct rpcrdma_mr_seg *seg = req->rl_segments;
+	struct rpcrdma_read_chunk *cur_rchunk = NULL;
+	struct rpcrdma_write_array *warray = NULL;
+	struct rpcrdma_write_chunk *cur_wchunk = NULL;
+	u32 *iptr = headerp->rm_body.rm_chunks;
+
+	if (type == rpcrdma_readch || type == rpcrdma_areadch) {
+		/* a read chunk - server will RDMA Read our memory */
+		cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
+	} else {
+		/* a write or reply chunk - server will RDMA Write our memory */
+		*iptr++ = xdr_zero;	/* encode a NULL read chunk list */
+		if (type == rpcrdma_replych)
+			*iptr++ = xdr_zero;	/* a NULL write chunk list */
+		warray = (struct rpcrdma_write_array *) iptr;
+		cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
+	}
+
+	if (type == rpcrdma_replych || type == rpcrdma_areadch)
+		pos = 0;
+	else
+		pos = target->head[0].iov_len;
+
+	nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
+	if (nsegs == 0)
+		return 0;
+
+	do {
+		/* bind/register the memory, then build chunk from result. */
+		int n = rpcrdma_register_external(seg, nsegs,
+						cur_wchunk != NULL, r_xprt);
+		if (n <= 0)
+			goto out;
+		if (cur_rchunk) {	/* read */
+			cur_rchunk->rc_discrim = xdr_one;
+			/* all read chunks have the same "position" */
+			cur_rchunk->rc_position = htonl(pos);
+			cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
+			cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
+			xdr_encode_hyper(
+					(u32 *)&cur_rchunk->rc_target.rs_offset,
+					seg->mr_base);
+			dprintk("RPC:       %s: read chunk "
+				"elem %d@0x%llx:0x%x pos %d (%s)\n", __func__,
+				seg->mr_len, seg->mr_base, seg->mr_rkey, pos,
+				n < nsegs ? "more" : "last");
+			cur_rchunk++;
+			r_xprt->rx_stats.read_chunk_count++;
+		} else {		/* write/reply */
+			cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
+			cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
+			xdr_encode_hyper(
+					(u32 *)&cur_wchunk->wc_target.rs_offset,
+					seg->mr_base);
+			dprintk("RPC:       %s: %s chunk "
+				"elem %d@0x%llx:0x%x (%s)\n", __func__,
+				(type == rpcrdma_replych) ? "reply" : "write",
+				seg->mr_len, seg->mr_base, seg->mr_rkey,
+				n < nsegs ? "more" : "last");
+			cur_wchunk++;
+			if (type == rpcrdma_replych)
+				r_xprt->rx_stats.reply_chunk_count++;
+			else
+				r_xprt->rx_stats.write_chunk_count++;
+			r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+		}
+		nchunks++;
+		seg   += n;
+		nsegs -= n;
+	} while (nsegs);
+
+	/* success. all failures return above */
+	req->rl_nchunks = nchunks;
+
+	BUG_ON(nchunks == 0);
+
+	/*
+	 * finish off header. If write, marshal discrim and nchunks.
+	 */
+	if (cur_rchunk) {
+		iptr = (u32 *) cur_rchunk;
+		*iptr++ = xdr_zero;	/* finish the read chunk list */
+		*iptr++ = xdr_zero;	/* encode a NULL write chunk list */
+		*iptr++ = xdr_zero;	/* encode a NULL reply chunk */
+	} else {
+		warray->wc_discrim = xdr_one;
+		warray->wc_nchunks = htonl(nchunks);
+		iptr = (u32 *) cur_wchunk;
+		if (type == rpcrdma_writech) {
+			*iptr++ = xdr_zero; /* finish the write chunk list */
+			*iptr++ = xdr_zero; /* encode a NULL reply chunk */
+		}
+	}
+
+	/*
+	 * Return header size.
+	 */
+	return (unsigned char *)iptr - (unsigned char *)headerp;
+
+out:
+	for (pos = 0; nchunks--;)
+		pos += rpcrdma_deregister_external(
+				&req->rl_segments[pos], r_xprt, NULL);
+	return 0;
+}
+
+/*
+ * Copy write data inline.
+ * This function is used for "small" requests. Data which is passed
+ * to RPC via iovecs (or page list) is copied directly into the
+ * pre-registered memory buffer for this request. For small amounts
+ * of data, this is efficient. The cutoff value is tunable.
+ */
+static int
+rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
+{
+	int i, npages, curlen;
+	int copy_len;
+	unsigned char *srcp, *destp;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+
+	destp = rqst->rq_svec[0].iov_base;
+	curlen = rqst->rq_svec[0].iov_len;
+	destp += curlen;
+	/*
+	 * Do optional padding where it makes sense. Alignment of write
+	 * payload can help the server, if our setting is accurate.
+	 */
+	pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
+	if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
+		pad = 0;	/* don't pad this request */
+
+	dprintk("RPC:       %s: pad %d destp 0x%p len %d hdrlen %d\n",
+		__func__, pad, destp, rqst->rq_slen, curlen);
+
+	copy_len = rqst->rq_snd_buf.page_len;
+	r_xprt->rx_stats.pullup_copy_count += copy_len;
+	npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
+	for (i = 0; copy_len && i < npages; i++) {
+		if (i == 0)
+			curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
+		else
+			curlen = PAGE_SIZE;
+		if (curlen > copy_len)
+			curlen = copy_len;
+		dprintk("RPC:       %s: page %d destp 0x%p len %d curlen %d\n",
+			__func__, i, destp, copy_len, curlen);
+		srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
+					KM_SKB_SUNRPC_DATA);
+		if (i == 0)
+			memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
+		else
+			memcpy(destp, srcp, curlen);
+		kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
+		rqst->rq_svec[0].iov_len += curlen;
+		destp += curlen;
+		copy_len -= curlen;
+	}
+	if (rqst->rq_snd_buf.tail[0].iov_len) {
+		curlen = rqst->rq_snd_buf.tail[0].iov_len;
+		if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
+			memcpy(destp,
+				rqst->rq_snd_buf.tail[0].iov_base, curlen);
+			r_xprt->rx_stats.pullup_copy_count += curlen;
+		}
+		dprintk("RPC:       %s: tail destp 0x%p len %d curlen %d\n",
+			__func__, destp, copy_len, curlen);
+		rqst->rq_svec[0].iov_len += curlen;
+	}
+	/* header now contains entire send message */
+	return pad;
+}
+
+/*
+ * Marshal a request: the primary job of this routine is to choose
+ * the transfer modes. See comments below.
+ *
+ * Uses multiple RDMA IOVs for a request:
+ *  [0] -- RPC RDMA header, which uses memory from the *start* of the
+ *         preregistered buffer that already holds the RPC data in
+ *         its middle.
+ *  [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
+ *  [2] -- optional padding.
+ *  [3] -- if padded, header only in [1] and data here.
+ */
+
+int
+rpcrdma_marshal_req(struct rpc_rqst *rqst)
+{
+	struct rpc_xprt *xprt = rqst->rq_task->tk_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	char *base;
+	size_t hdrlen, rpclen, padlen;
+	enum rpcrdma_chunktype rtype, wtype;
+	struct rpcrdma_msg *headerp;
+
+	/*
+	 * rpclen gets amount of data in first buffer, which is the
+	 * pre-registered buffer.
+	 */
+	base = rqst->rq_svec[0].iov_base;
+	rpclen = rqst->rq_svec[0].iov_len;
+
+	/* build RDMA header in private area at front */
+	headerp = (struct rpcrdma_msg *) req->rl_base;
+	/* don't htonl XID, it's already done in request */
+	headerp->rm_xid = rqst->rq_xid;
+	headerp->rm_vers = xdr_one;
+	headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
+	headerp->rm_type = __constant_htonl(RDMA_MSG);
+
+	/*
+	 * Chunks needed for results?
+	 *
+	 * o If the expected result is under the inline threshold, all ops
+	 *   return as inline (but see later).
+	 * o Large non-read ops return as a single reply chunk.
+	 * o Large read ops return data as write chunk(s), header as inline.
+	 *
+	 * Note: the NFS code sending down multiple result segments implies
+	 * the op is one of read, readdir[plus], readlink or NFSv4 getacl.
+	 */
+
+	/*
+	 * This code can handle read chunks, write chunks OR reply
+	 * chunks -- only one type. If the request is too big to fit
+	 * inline, then we will choose read chunks. If the request is
+	 * a READ, then use write chunks to separate the file data
+	 * into pages; otherwise use reply chunks.
+	 */
+	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
+		wtype = rpcrdma_noch;
+	else if (rqst->rq_rcv_buf.page_len == 0)
+		wtype = rpcrdma_replych;
+	else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
+		wtype = rpcrdma_writech;
+	else
+		wtype = rpcrdma_replych;
+
+	/*
+	 * Chunks needed for arguments?
+	 *
+	 * o If the total request is under the inline threshold, all ops
+	 *   are sent as inline.
+	 * o Large non-write ops are sent with the entire message as a
+	 *   single read chunk (protocol 0-position special case).
+	 * o Large write ops transmit data as read chunk(s), header as
+	 *   inline.
+	 *
+	 * Note: the NFS code sending down multiple argument segments
+	 * implies the op is a write.
+	 * TBD check NFSv4 setacl
+	 */
+	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
+		rtype = rpcrdma_noch;
+	else if (rqst->rq_snd_buf.page_len == 0)
+		rtype = rpcrdma_areadch;
+	else
+		rtype = rpcrdma_readch;
+
+	/* The following simplification is not true forever */
+	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+		wtype = rpcrdma_noch;
+	BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
+
+	if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
+	    (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
+		/* forced to "pure inline"? */
+		dprintk("RPC:       %s: too much data (%d/%d) for inline\n",
+			__func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
+		return -1;
+	}
+
+	hdrlen = 28; /*sizeof *headerp;*/
+	padlen = 0;
+
+	/*
+	 * Pull up any extra send data into the preregistered buffer.
+	 * When padding is in use and applies to the transfer, insert
+	 * it and change the message type.
+	 */
+	if (rtype == rpcrdma_noch) {
+
+		padlen = rpcrdma_inline_pullup(rqst,
+						RPCRDMA_INLINE_PAD_VALUE(rqst));
+
+		if (padlen) {
+			headerp->rm_type = __constant_htonl(RDMA_MSGP);
+			headerp->rm_body.rm_padded.rm_align =
+				htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
+			headerp->rm_body.rm_padded.rm_thresh =
+				__constant_htonl(RPCRDMA_INLINE_PAD_THRESH);
+			headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
+			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
+			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
+			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
+			BUG_ON(wtype != rpcrdma_noch);
+
+		} else {
+			headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
+			headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
+			headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
+			/* new length after pullup */
+			rpclen = rqst->rq_svec[0].iov_len;
+			/*
+			 * Currently we try to not actually use read inline.
+			 * Reply chunks have the desirable property that
+			 * they land, packed, directly in the target buffers
+			 * without headers, so they require no fixup. The
+			 * additional RDMA Write op sends the same amount
+			 * of data, streams on-the-wire and adds no overhead
+			 * on receive. Therefore, we request a reply chunk
+			 * for non-writes wherever feasible and efficient.
+			 */
+			if (wtype == rpcrdma_noch &&
+			    r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+				wtype = rpcrdma_replych;
+		}
+	}
+
+	/*
+	 * Marshal chunks. This routine will return the header length
+	 * consumed by marshaling.
+	 */
+	if (rtype != rpcrdma_noch) {
+		hdrlen = rpcrdma_create_chunks(rqst,
+					&rqst->rq_snd_buf, headerp, rtype);
+		wtype = rtype;	/* simplify dprintk */
+
+	} else if (wtype != rpcrdma_noch) {
+		hdrlen = rpcrdma_create_chunks(rqst,
+					&rqst->rq_rcv_buf, headerp, wtype);
+	}
+
+	if (hdrlen == 0)
+		return -1;
+
+	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
+		"                   headerp 0x%p base 0x%p lkey 0x%x\n",
+		__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
+		headerp, base, req->rl_iov.lkey);
+
+	/*
+	 * initialize send_iov's - normally only two: rdma chunk header and
+	 * single preregistered RPC header buffer, but if padding is present,
+	 * then use a preregistered (and zeroed) pad buffer between the RPC
+	 * header and any write data. In all non-rdma cases, any following
+	 * data has been copied into the RPC header buffer.
+	 */
+	req->rl_send_iov[0].addr = req->rl_iov.addr;
+	req->rl_send_iov[0].length = hdrlen;
+	req->rl_send_iov[0].lkey = req->rl_iov.lkey;
+
+	req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
+	req->rl_send_iov[1].length = rpclen;
+	req->rl_send_iov[1].lkey = req->rl_iov.lkey;
+
+	req->rl_niovs = 2;
+
+	if (padlen) {
+		struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+		req->rl_send_iov[2].addr = ep->rep_pad.addr;
+		req->rl_send_iov[2].length = padlen;
+		req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
+
+		req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
+		req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
+		req->rl_send_iov[3].lkey = req->rl_iov.lkey;
+
+		req->rl_niovs = 4;
+	}
+
+	return 0;
+}
+
+/*
+ * Chase down a received write or reply chunklist to get length
+ * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
+ */
+static int
+rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
+{
+	unsigned int i, total_len;
+	struct rpcrdma_write_chunk *cur_wchunk;
+
+	i = ntohl(**iptrp);	/* get array count */
+	if (i > max)
+		return -1;
+	cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
+	total_len = 0;
+	while (i--) {
+		struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
+		ifdebug(FACILITY) {
+			u64 off;
+			xdr_decode_hyper((u32 *)&seg->rs_offset, &off);
+			dprintk("RPC:       %s: chunk %d@0x%llx:0x%x\n",
+				__func__,
+				ntohl(seg->rs_length),
+				off,
+				ntohl(seg->rs_handle));
+		}
+		total_len += ntohl(seg->rs_length);
+		++cur_wchunk;
+	}
+	/* check and adjust for properly terminated write chunk */
+	if (wrchunk) {
+		u32 *w = (u32 *) cur_wchunk;
+		if (*w++ != xdr_zero)
+			return -1;
+		cur_wchunk = (struct rpcrdma_write_chunk *) w;
+	}
+	if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+		return -1;
+
+	*iptrp = (u32 *) cur_wchunk;
+	return total_len;
+}
+
+/*
+ * Scatter inline received data back into provided iov's.
+ */
+static void
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+{
+	int i, npages, curlen, olen;
+	char *destp;
+
+	curlen = rqst->rq_rcv_buf.head[0].iov_len;
+	if (curlen > copy_len) {	/* write chunk header fixup */
+		curlen = copy_len;
+		rqst->rq_rcv_buf.head[0].iov_len = curlen;
+	}
+
+	dprintk("RPC:       %s: srcp 0x%p len %d hdrlen %d\n",
+		__func__, srcp, copy_len, curlen);
+
+	/* Shift pointer for first receive segment only */
+	rqst->rq_rcv_buf.head[0].iov_base = srcp;
+	srcp += curlen;
+	copy_len -= curlen;
+
+	olen = copy_len;
+	i = 0;
+	rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
+	if (copy_len && rqst->rq_rcv_buf.page_len) {
+		npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
+			rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
+		for (; i < npages; i++) {
+			if (i == 0)
+				curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
+			else
+				curlen = PAGE_SIZE;
+			if (curlen > copy_len)
+				curlen = copy_len;
+			dprintk("RPC:       %s: page %d"
+				" srcp 0x%p len %d curlen %d\n",
+				__func__, i, srcp, copy_len, curlen);
+			destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
+						KM_SKB_SUNRPC_DATA);
+			if (i == 0)
+				memcpy(destp + rqst->rq_rcv_buf.page_base,
+						srcp, curlen);
+			else
+				memcpy(destp, srcp, curlen);
+			flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
+			kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
+			srcp += curlen;
+			copy_len -= curlen;
+			if (copy_len == 0)
+				break;
+		}
+		rqst->rq_rcv_buf.page_len = olen - copy_len;
+	} else
+		rqst->rq_rcv_buf.page_len = 0;
+
+	if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
+		curlen = copy_len;
+		if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
+			curlen = rqst->rq_rcv_buf.tail[0].iov_len;
+		if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
+			memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+		dprintk("RPC:       %s: tail srcp 0x%p len %d curlen %d\n",
+			__func__, srcp, copy_len, curlen);
+		rqst->rq_rcv_buf.tail[0].iov_len = curlen;
+		copy_len -= curlen; ++i;
+	} else
+		rqst->rq_rcv_buf.tail[0].iov_len = 0;
+
+	if (copy_len)
+		dprintk("RPC:       %s: %d bytes in"
+			" %d extra segments (%d lost)\n",
+			__func__, olen, i, copy_len);
+
+	/* TBD avoid a warning from call_decode() */
+	rqst->rq_private_buf = rqst->rq_rcv_buf;
+}
+
+/*
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
+ */
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
+{
+	struct rpc_xprt *xprt = ep->rep_xprt;
+
+	spin_lock_bh(&xprt->transport_lock);
+	if (ep->rep_connected > 0) {
+		if (!xprt_test_and_set_connected(xprt))
+			xprt_wake_pending_tasks(xprt, 0);
+	} else {
+		if (xprt_test_and_clear_connected(xprt))
+			xprt_wake_pending_tasks(xprt, ep->rep_connected);
+	}
+	spin_unlock_bh(&xprt->transport_lock);
+}
+
+/*
+ * This function is called when memory window unbind which we are waiting
+ * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ */
+static void
+rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+{
+	wake_up(&rep->rr_unbind);
+}
+
+/*
+ * Called as a tasklet to do req/reply match and complete a request
+ * Errors must result in the RPC task either being awakened, or
+ * allowed to timeout, to discover the errors at that time.
+ */
+void
+rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_msg *headerp;
+	struct rpcrdma_req *req;
+	struct rpc_rqst *rqst;
+	struct rpc_xprt *xprt = rep->rr_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	u32 *iptr;
+	int i, rdmalen, status;
+
+	/* Check status. If bad, signal disconnect and return rep to pool */
+	if (rep->rr_len == ~0U) {
+		rpcrdma_recv_buffer_put(rep);
+		if (r_xprt->rx_ep.rep_connected == 1) {
+			r_xprt->rx_ep.rep_connected = -EIO;
+			rpcrdma_conn_func(&r_xprt->rx_ep);
+		}
+		return;
+	}
+	if (rep->rr_len < 28) {
+		dprintk("RPC:       %s: short/invalid reply\n", __func__);
+		goto repost;
+	}
+	headerp = (struct rpcrdma_msg *) rep->rr_base;
+	if (headerp->rm_vers != xdr_one) {
+		dprintk("RPC:       %s: invalid version %d\n",
+			__func__, ntohl(headerp->rm_vers));
+		goto repost;
+	}
+
+	/* Get XID and try for a match. */
+	spin_lock(&xprt->transport_lock);
+	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+	if (rqst == NULL) {
+		spin_unlock(&xprt->transport_lock);
+		dprintk("RPC:       %s: reply 0x%p failed "
+			"to match any request xid 0x%08x len %d\n",
+			__func__, rep, headerp->rm_xid, rep->rr_len);
+repost:
+		r_xprt->rx_stats.bad_reply_count++;
+		rep->rr_func = rpcrdma_reply_handler;
+		if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+			rpcrdma_recv_buffer_put(rep);
+
+		return;
+	}
+
+	/* get request object */
+	req = rpcr_to_rdmar(rqst);
+
+	dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"
+		"                   RPC request 0x%p xid 0x%08x\n",
+			__func__, rep, req, rqst, headerp->rm_xid);
+
+	BUG_ON(!req || req->rl_reply);
+
+	/* from here on, the reply is no longer an orphan */
+	req->rl_reply = rep;
+
+	/* check for expected message types */
+	/* The order of some of these tests is important. */
+	switch (headerp->rm_type) {
+	case __constant_htonl(RDMA_MSG):
+		/* never expect read chunks */
+		/* never expect reply chunks (two ways to check) */
+		/* never expect write chunks without having offered RDMA */
+		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
+		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||
+		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&
+		     req->rl_nchunks == 0))
+			goto badheader;
+		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
+			/* count any expected write chunks in read reply */
+			/* start at write chunk array count */
+			iptr = &headerp->rm_body.rm_chunks[2];
+			rdmalen = rpcrdma_count_chunks(rep,
+						req->rl_nchunks, 1, &iptr);
+			/* check for validity, and no reply chunk after */
+			if (rdmalen < 0 || *iptr++ != xdr_zero)
+				goto badheader;
+			rep->rr_len -=
+			    ((unsigned char *)iptr - (unsigned char *)headerp);
+			status = rep->rr_len + rdmalen;
+			r_xprt->rx_stats.total_rdma_reply += rdmalen;
+		} else {
+			/* else ordinary inline */
+			iptr = (u32 *)((unsigned char *)headerp + 28);
+			rep->rr_len -= 28; /*sizeof *headerp;*/
+			status = rep->rr_len;
+		}
+		/* Fix up the rpc results for upper layer */
+		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+		break;
+
+	case __constant_htonl(RDMA_NOMSG):
+		/* never expect read or write chunks, always reply chunks */
+		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
+		    headerp->rm_body.rm_chunks[2] != xdr_one ||
+		    req->rl_nchunks == 0)
+			goto badheader;
+		iptr = (u32 *)((unsigned char *)headerp + 28);
+		rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
+		if (rdmalen < 0)
+			goto badheader;
+		r_xprt->rx_stats.total_rdma_reply += rdmalen;
+		/* Reply chunk buffer already is the reply vector - no fixup. */
+		status = rdmalen;
+		break;
+
+badheader:
+	default:
+		dprintk("%s: invalid rpcrdma reply header (type %d):"
+				" chunks[012] == %d %d %d"
+				" expected chunks <= %d\n",
+				__func__, ntohl(headerp->rm_type),
+				headerp->rm_body.rm_chunks[0],
+				headerp->rm_body.rm_chunks[1],
+				headerp->rm_body.rm_chunks[2],
+				req->rl_nchunks);
+		status = -EIO;
+		r_xprt->rx_stats.bad_reply_count++;
+		break;
+	}
+
+	/* If using mw bind, start the deregister process now. */
+	/* (Note: if mr_free(), cannot perform it here, in tasklet context) */
+	if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
+	case RPCRDMA_MEMWINDOWS:
+		for (i = 0; req->rl_nchunks-- > 1;)
+			i += rpcrdma_deregister_external(
+				&req->rl_segments[i], r_xprt, NULL);
+		/* Optionally wait (not here) for unbinds to complete */
+		rep->rr_func = rpcrdma_unbind_func;
+		(void) rpcrdma_deregister_external(&req->rl_segments[i],
+						   r_xprt, rep);
+		break;
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+		for (i = 0; req->rl_nchunks--;)
+			i += rpcrdma_deregister_external(&req->rl_segments[i],
+							 r_xprt, NULL);
+		break;
+	default:
+		break;
+	}
+
+	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+			__func__, xprt, rqst, status);
+	xprt_complete_rqst(rqst->rq_task, status);
+	spin_unlock(&xprt->transport_lock);
+}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
new file mode 100644
index 0000000..dc55cc97
--- /dev/null
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * transport.c
+ *
+ * This file contains the top-level implementation of an RPC RDMA
+ * transport.
+ *
+ * Naming convention: functions beginning with xprt_ are part of the
+ * transport switch. All others are RPC RDMA internal.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include "xprt_rdma.h"
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
+MODULE_AUTHOR("Network Appliance, Inc.");
+
+/*
+ * tunables
+ */
+
+static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_inline_write_padding;
+#if !RPCRDMA_PERSISTENT_REGISTRATION
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
+#else
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
+#endif
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
+static unsigned int zero;
+static unsigned int max_padding = PAGE_SIZE;
+static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
+static unsigned int max_memreg = RPCRDMA_LAST - 1;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+static ctl_table xr_tunables_table[] = {
+	{
+		.ctl_name	= CTL_SLOTTABLE_RDMA,
+		.procname	= "rdma_slot_table_entries",
+		.data		= &xprt_rdma_slot_table_entries,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_slot_table_size,
+		.extra2		= &max_slot_table_size
+	},
+	{
+		.ctl_name	= CTL_RDMA_MAXINLINEREAD,
+		.procname	= "rdma_max_inline_read",
+		.data		= &xprt_rdma_max_inline_read,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+	},
+	{
+		.ctl_name	= CTL_RDMA_MAXINLINEWRITE,
+		.procname	= "rdma_max_inline_write",
+		.data		= &xprt_rdma_max_inline_write,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+	},
+	{
+		.ctl_name	= CTL_RDMA_WRITEPADDING,
+		.procname	= "rdma_inline_write_padding",
+		.data		= &xprt_rdma_inline_write_padding,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &max_padding,
+	},
+	{
+		.ctl_name	= CTL_RDMA_MEMREG,
+		.procname	= "rdma_memreg_strategy",
+		.data		= &xprt_rdma_memreg_strategy,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_memreg,
+		.extra2		= &max_memreg,
+	},
+	{
+		.ctl_name = 0,
+	},
+};
+
+static ctl_table sunrpc_table[] = {
+	{
+		.ctl_name	= CTL_SUNRPC,
+		.procname	= "sunrpc",
+		.mode		= 0555,
+		.child		= xr_tunables_table
+	},
+	{
+		.ctl_name = 0,
+	},
+};
+
+#endif
+
+static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
+
+static void
+xprt_rdma_format_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *)
+					&rpcx_to_rdmad(xprt).addr;
+	char *buf;
+
+	buf = kzalloc(20, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr));
+	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 8, "%u", ntohs(addr->sin_port));
+	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
+
+	buf = kzalloc(48, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port), "rdma");
+	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+	buf = kzalloc(10, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 10, "%02x%02x%02x%02x",
+			NIPQUAD(addr->sin_addr.s_addr));
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+	buf = kzalloc(30, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port) >> 8,
+			ntohs(addr->sin_port) & 0xff);
+	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+	/* netid */
+	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+}
+
+static void
+xprt_rdma_free_addresses(struct rpc_xprt *xprt)
+{
+	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+	kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
+}
+
+static void
+xprt_rdma_connect_worker(struct work_struct *work)
+{
+	struct rpcrdma_xprt *r_xprt =
+		container_of(work, struct rpcrdma_xprt, rdma_connect.work);
+	struct rpc_xprt *xprt = &r_xprt->xprt;
+	int rc = 0;
+
+	if (!xprt->shutdown) {
+		xprt_clear_connected(xprt);
+
+		dprintk("RPC:       %s: %sconnect\n", __func__,
+				r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+		rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+		if (rc)
+			goto out;
+	}
+	goto out_clear;
+
+out:
+	xprt_wake_pending_tasks(xprt, rc);
+
+out_clear:
+	dprintk("RPC:       %s: exit\n", __func__);
+	xprt_clear_connecting(xprt);
+}
+
+/*
+ * xprt_rdma_destroy
+ *
+ * Destroy the xprt.
+ * Free all memory associated with the object, including its own.
+ * NOTE: none of the *destroy methods free memory for their top-level
+ * objects, even though they may have allocated it (they do free
+ * private memory). It's up to the caller to handle it. In this
+ * case (RDMA transport), all structure memory is inlined with the
+ * struct rpcrdma_xprt.
+ */
+static void
+xprt_rdma_destroy(struct rpc_xprt *xprt)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	int rc;
+
+	dprintk("RPC:       %s: called\n", __func__);
+
+	cancel_delayed_work(&r_xprt->rdma_connect);
+	flush_scheduled_work();
+
+	xprt_clear_connected(xprt);
+
+	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
+	rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	if (rc)
+		dprintk("RPC:       %s: rpcrdma_ep_destroy returned %i\n",
+			__func__, rc);
+	rpcrdma_ia_close(&r_xprt->rx_ia);
+
+	xprt_rdma_free_addresses(xprt);
+
+	kfree(xprt->slot);
+	xprt->slot = NULL;
+	kfree(xprt);
+
+	dprintk("RPC:       %s: returning\n", __func__);
+
+	module_put(THIS_MODULE);
+}
+
+/**
+ * xprt_setup_rdma - Set up transport to use RDMA
+ *
+ * @args: rpc transport arguments
+ */
+static struct rpc_xprt *
+xprt_setup_rdma(struct xprt_create *args)
+{
+	struct rpcrdma_create_data_internal cdata;
+	struct rpc_xprt *xprt;
+	struct rpcrdma_xprt *new_xprt;
+	struct rpcrdma_ep *new_ep;
+	struct sockaddr_in *sin;
+	int rc;
+
+	if (args->addrlen > sizeof(xprt->addr)) {
+		dprintk("RPC:       %s: address too large\n", __func__);
+		return ERR_PTR(-EBADF);
+	}
+
+	xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+	if (xprt == NULL) {
+		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
+			__func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	xprt->max_reqs = xprt_rdma_slot_table_entries;
+	xprt->slot = kcalloc(xprt->max_reqs,
+				sizeof(struct rpc_rqst), GFP_KERNEL);
+	if (xprt->slot == NULL) {
+		kfree(xprt);
+		dprintk("RPC:       %s: couldn't allocate %d slots\n",
+			__func__, xprt->max_reqs);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* 60 second timeout, no retries */
+	xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ);
+	xprt->bind_timeout = (60U * HZ);
+	xprt->connect_timeout = (60U * HZ);
+	xprt->reestablish_timeout = (5U * HZ);
+	xprt->idle_timeout = (5U * 60 * HZ);
+
+	xprt->resvport = 0;		/* privileged port not needed */
+	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
+	xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
+	xprt->ops = &xprt_rdma_procs;
+
+	/*
+	 * Set up RDMA-specific connect data.
+	 */
+
+	/* Put server RDMA address in local cdata */
+	memcpy(&cdata.addr, args->dstaddr, args->addrlen);
+
+	/* Ensure xprt->addr holds valid server TCP (not RDMA)
+	 * address, for any side protocols which peek at it */
+	xprt->prot = IPPROTO_TCP;
+	xprt->addrlen = args->addrlen;
+	memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
+
+	sin = (struct sockaddr_in *)&cdata.addr;
+	if (ntohs(sin->sin_port) != 0)
+		xprt_set_bound(xprt);
+
+	dprintk("RPC:       %s: %u.%u.%u.%u:%u\n", __func__,
+			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
+
+	/* Set max requests */
+	cdata.max_requests = xprt->max_reqs;
+
+	/* Set some length limits */
+	cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
+	cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
+
+	cdata.inline_wsize = xprt_rdma_max_inline_write;
+	if (cdata.inline_wsize > cdata.wsize)
+		cdata.inline_wsize = cdata.wsize;
+
+	cdata.inline_rsize = xprt_rdma_max_inline_read;
+	if (cdata.inline_rsize > cdata.rsize)
+		cdata.inline_rsize = cdata.rsize;
+
+	cdata.padding = xprt_rdma_inline_write_padding;
+
+	/*
+	 * Create new transport instance, which includes initialized
+	 *  o ia
+	 *  o endpoint
+	 *  o buffers
+	 */
+
+	new_xprt = rpcx_to_rdmax(xprt);
+
+	rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
+				xprt_rdma_memreg_strategy);
+	if (rc)
+		goto out1;
+
+	/*
+	 * initialize and create ep
+	 */
+	new_xprt->rx_data = cdata;
+	new_ep = &new_xprt->rx_ep;
+	new_ep->rep_remote_addr = cdata.addr;
+
+	rc = rpcrdma_ep_create(&new_xprt->rx_ep,
+				&new_xprt->rx_ia, &new_xprt->rx_data);
+	if (rc)
+		goto out2;
+
+	/*
+	 * Allocate pre-registered send and receive buffers for headers and
+	 * any inline data. Also specify any padding which will be provided
+	 * from a preregistered zero buffer.
+	 */
+	rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
+				&new_xprt->rx_data);
+	if (rc)
+		goto out3;
+
+	/*
+	 * Register a callback for connection events. This is necessary because
+	 * connection loss notification is async. We also catch connection loss
+	 * when reaping receives.
+	 */
+	INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
+	new_ep->rep_func = rpcrdma_conn_func;
+	new_ep->rep_xprt = xprt;
+
+	xprt_rdma_format_addresses(xprt);
+
+	if (!try_module_get(THIS_MODULE))
+		goto out4;
+
+	return xprt;
+
+out4:
+	xprt_rdma_free_addresses(xprt);
+	rc = -EINVAL;
+out3:
+	(void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+out2:
+	rpcrdma_ia_close(&new_xprt->rx_ia);
+out1:
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(rc);
+}
+
+/*
+ * Close a connection, during shutdown or timeout/reconnect
+ */
+static void
+xprt_rdma_close(struct rpc_xprt *xprt)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	dprintk("RPC:       %s: closing\n", __func__);
+	xprt_disconnect(xprt);
+	(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+}
+
+static void
+xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
+{
+	struct sockaddr_in *sap;
+
+	sap = (struct sockaddr_in *)&xprt->addr;
+	sap->sin_port = htons(port);
+	sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
+	sap->sin_port = htons(port);
+	dprintk("RPC:       %s: %u\n", __func__, port);
+}
+
+static void
+xprt_rdma_connect(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	if (!xprt_test_and_set_connecting(xprt)) {
+		if (r_xprt->rx_ep.rep_connected != 0) {
+			/* Reconnect */
+			schedule_delayed_work(&r_xprt->rdma_connect,
+				xprt->reestablish_timeout);
+		} else {
+			schedule_delayed_work(&r_xprt->rdma_connect, 0);
+			if (!RPC_IS_ASYNC(task))
+				flush_scheduled_work();
+		}
+	}
+}
+
+static int
+xprt_rdma_reserve_xprt(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
+
+	/* == RPC_CWNDSCALE @ init, but *after* setup */
+	if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
+		r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
+		dprintk("RPC:       %s: cwndscale %lu\n", __func__,
+			r_xprt->rx_buf.rb_cwndscale);
+		BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
+	}
+	xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
+	return xprt_reserve_xprt_cong(task);
+}
+
+/*
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
+ * sequence. For this reason, the recv buffers are attached to send
+ * buffers for portions of the RPC. Note that the RPC layer allocates
+ * both send and receive buffers in the same call. We may register
+ * the receive buffer portion when using reply chunks.
+ */
+static void *
+xprt_rdma_allocate(struct rpc_task *task, size_t size)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpcrdma_req *req, *nreq;
+
+	req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
+	BUG_ON(NULL == req);
+
+	if (size > req->rl_size) {
+		dprintk("RPC:       %s: size %zd too large for buffer[%zd]: "
+			"prog %d vers %d proc %d\n",
+			__func__, size, req->rl_size,
+			task->tk_client->cl_prog, task->tk_client->cl_vers,
+			task->tk_msg.rpc_proc->p_proc);
+		/*
+		 * Outgoing length shortage. Our inline write max must have
+		 * been configured to perform direct i/o.
+		 *
+		 * This is therefore a large metadata operation, and the
+		 * allocate call was made on the maximum possible message,
+		 * e.g. containing long filename(s) or symlink data. In
+		 * fact, while these metadata operations *might* carry
+		 * large outgoing payloads, they rarely *do*. However, we
+		 * have to commit to the request here, so reallocate and
+		 * register it now. The data path will never require this
+		 * reallocation.
+		 *
+		 * If the allocation or registration fails, the RPC framework
+		 * will (doggedly) retry.
+		 */
+		if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
+				RPCRDMA_BOUNCEBUFFERS) {
+			/* forced to "pure inline" */
+			dprintk("RPC:       %s: too much data (%zd) for inline "
+					"(r/w max %d/%d)\n", __func__, size,
+					rpcx_to_rdmad(xprt).inline_rsize,
+					rpcx_to_rdmad(xprt).inline_wsize);
+			size = req->rl_size;
+			rpc_exit(task, -EIO);		/* fail the operation */
+			rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+			goto out;
+		}
+		if (task->tk_flags & RPC_TASK_SWAPPER)
+			nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
+		else
+			nreq = kmalloc(sizeof *req + size, GFP_NOFS);
+		if (nreq == NULL)
+			goto outfail;
+
+		if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
+				nreq->rl_base, size + sizeof(struct rpcrdma_req)
+				- offsetof(struct rpcrdma_req, rl_base),
+				&nreq->rl_handle, &nreq->rl_iov)) {
+			kfree(nreq);
+			goto outfail;
+		}
+		rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
+		nreq->rl_size = size;
+		nreq->rl_niovs = 0;
+		nreq->rl_nchunks = 0;
+		nreq->rl_buffer = (struct rpcrdma_buffer *)req;
+		nreq->rl_reply = req->rl_reply;
+		memcpy(nreq->rl_segments,
+			req->rl_segments, sizeof nreq->rl_segments);
+		/* flag the swap with an unused field */
+		nreq->rl_iov.length = 0;
+		req->rl_reply = NULL;
+		req = nreq;
+	}
+	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
+out:
+	return req->rl_xdr_buf;
+
+outfail:
+	rpcrdma_buffer_put(req);
+	rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+	return NULL;
+}
+
+/*
+ * This function returns all RDMA resources to the pool.
+ */
+static void
+xprt_rdma_free(void *buffer)
+{
+	struct rpcrdma_req *req;
+	struct rpcrdma_xprt *r_xprt;
+	struct rpcrdma_rep *rep;
+	int i;
+
+	if (buffer == NULL)
+		return;
+
+	req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
+	r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
+	rep = req->rl_reply;
+
+	dprintk("RPC:       %s: called on 0x%p%s\n",
+		__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
+
+	/*
+	 * Finish the deregistration. When using mw bind, this was
+	 * begun in rpcrdma_reply_handler(). In all other modes, we
+	 * do it here, in thread context. The process is considered
+	 * complete when the rr_func vector becomes NULL - this
+	 * was put in place during rpcrdma_reply_handler() - the wait
+	 * call below will not block if the dereg is "done". If
+	 * interrupted, our framework will clean up.
+	 */
+	for (i = 0; req->rl_nchunks;) {
+		--req->rl_nchunks;
+		i += rpcrdma_deregister_external(
+			&req->rl_segments[i], r_xprt, NULL);
+	}
+
+	if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
+		rep->rr_func = NULL;	/* abandon the callback */
+		req->rl_reply = NULL;
+	}
+
+	if (req->rl_iov.length == 0) {	/* see allocate above */
+		struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
+		oreq->rl_reply = req->rl_reply;
+		(void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
+						   req->rl_handle,
+						   &req->rl_iov);
+		kfree(req);
+		req = oreq;
+	}
+
+	/* Put back request+reply buffers */
+	rpcrdma_buffer_put(req);
+}
+
+/*
+ * send_request invokes the meat of RPC RDMA. It must do the following:
+ *  1.  Marshal the RPC request into an RPC RDMA request, which means
+ *	putting a header in front of data, and creating IOVs for RDMA
+ *	from those in the request.
+ *  2.  In marshaling, detect opportunities for RDMA, and use them.
+ *  3.  Post a recv message to set up asynch completion, then send
+ *	the request (rpcrdma_ep_post).
+ *  4.  No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ */
+
+static int
+xprt_rdma_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *rqst = task->tk_rqstp;
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	/* marshal the send itself */
+	if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
+		r_xprt->rx_stats.failed_marshal_count++;
+		dprintk("RPC:       %s: rpcrdma_marshal_req failed\n",
+			__func__);
+		return -EIO;
+	}
+
+	if (req->rl_reply == NULL) 		/* e.g. reconnection */
+		rpcrdma_recv_buffer_get(req);
+
+	if (req->rl_reply) {
+		req->rl_reply->rr_func = rpcrdma_reply_handler;
+		/* this need only be done once, but... */
+		req->rl_reply->rr_xprt = xprt;
+	}
+
+	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
+		xprt_disconnect(xprt);
+		return -ENOTCONN;	/* implies disconnect */
+	}
+
+	rqst->rq_bytes_sent = 0;
+	return 0;
+}
+
+static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	long idle_time = 0;
+
+	if (xprt_connected(xprt))
+		idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+	seq_printf(seq,
+	  "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
+	  "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
+
+	   0,	/* need a local port? */
+	   xprt->stat.bind_count,
+	   xprt->stat.connect_count,
+	   xprt->stat.connect_time,
+	   idle_time,
+	   xprt->stat.sends,
+	   xprt->stat.recvs,
+	   xprt->stat.bad_xids,
+	   xprt->stat.req_u,
+	   xprt->stat.bklog_u,
+
+	   r_xprt->rx_stats.read_chunk_count,
+	   r_xprt->rx_stats.write_chunk_count,
+	   r_xprt->rx_stats.reply_chunk_count,
+	   r_xprt->rx_stats.total_rdma_request,
+	   r_xprt->rx_stats.total_rdma_reply,
+	   r_xprt->rx_stats.pullup_copy_count,
+	   r_xprt->rx_stats.fixup_copy_count,
+	   r_xprt->rx_stats.hardway_register_count,
+	   r_xprt->rx_stats.failed_marshal_count,
+	   r_xprt->rx_stats.bad_reply_count);
+}
+
+/*
+ * Plumbing for rpc transport switch and kernel module
+ */
+
+static struct rpc_xprt_ops xprt_rdma_procs = {
+	.reserve_xprt		= xprt_rdma_reserve_xprt,
+	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
+	.release_request	= xprt_release_rqst_cong,       /* ditto */
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
+	.set_port		= xprt_rdma_set_port,
+	.connect		= xprt_rdma_connect,
+	.buf_alloc		= xprt_rdma_allocate,
+	.buf_free		= xprt_rdma_free,
+	.send_request		= xprt_rdma_send_request,
+	.close			= xprt_rdma_close,
+	.destroy		= xprt_rdma_destroy,
+	.print_stats		= xprt_rdma_print_stats
+};
+
+static struct xprt_class xprt_rdma = {
+	.list			= LIST_HEAD_INIT(xprt_rdma.list),
+	.name			= "rdma",
+	.owner			= THIS_MODULE,
+	.ident			= XPRT_TRANSPORT_RDMA,
+	.setup			= xprt_setup_rdma,
+};
+
+static void __exit xprt_rdma_cleanup(void)
+{
+	int rc;
+
+	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+#ifdef RPC_DEBUG
+	if (sunrpc_table_header) {
+		unregister_sysctl_table(sunrpc_table_header);
+		sunrpc_table_header = NULL;
+	}
+#endif
+	rc = xprt_unregister_transport(&xprt_rdma);
+	if (rc)
+		dprintk("RPC:       %s: xprt_unregister returned %i\n",
+			__func__, rc);
+}
+
+static int __init xprt_rdma_init(void)
+{
+	int rc;
+
+	rc = xprt_register_transport(&xprt_rdma);
+
+	if (rc)
+		return rc;
+
+	dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
+
+	dprintk(KERN_INFO "Defaults:\n");
+	dprintk(KERN_INFO "\tSlots %d\n"
+		"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
+		xprt_rdma_slot_table_entries,
+		xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
+	dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
+		xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+
+#ifdef RPC_DEBUG
+	if (!sunrpc_table_header)
+		sunrpc_table_header = register_sysctl_table(sunrpc_table);
+#endif
+	return 0;
+}
+
+module_init(xprt_rdma_init);
+module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
new file mode 100644
index 0000000..9ec8ca4
--- /dev/null
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -0,0 +1,1626 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * verbs.c
+ *
+ * Encapsulates the major functions managing:
+ *  o adapters
+ *  o endpoints
+ *  o connections
+ *  o buffer memory
+ */
+
+#include <linux/pci.h>	/* for Tavor hack below */
+
+#include "xprt_rdma.h"
+
+/*
+ * Globals/Macros
+ */
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+/*
+ * internal functions
+ */
+
+/*
+ * handle replies in tasklet context, using a single, global list
+ * rdma tasklet function -- just turn around and call the func
+ * for all replies on the list
+ */
+
+static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
+static LIST_HEAD(rpcrdma_tasklets_g);
+
+static void
+rpcrdma_run_tasklet(unsigned long data)
+{
+	struct rpcrdma_rep *rep;
+	void (*func)(struct rpcrdma_rep *);
+	unsigned long flags;
+
+	data = data;
+	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+	while (!list_empty(&rpcrdma_tasklets_g)) {
+		rep = list_entry(rpcrdma_tasklets_g.next,
+				 struct rpcrdma_rep, rr_list);
+		list_del(&rep->rr_list);
+		func = rep->rr_func;
+		rep->rr_func = NULL;
+		spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+
+		if (func)
+			func(rep);
+		else
+			rpcrdma_recv_buffer_put(rep);
+
+		spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+	}
+	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+}
+
+static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
+
+static inline void
+rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+	list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
+	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+	tasklet_schedule(&rpcrdma_tasklet_g);
+}
+
+static void
+rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+	struct rpcrdma_ep *ep = context;
+
+	dprintk("RPC:       %s: QP error %X on device %s ep %p\n",
+		__func__, event->event, event->device->name, context);
+	if (ep->rep_connected == 1) {
+		ep->rep_connected = -EIO;
+		ep->rep_func(ep);
+		wake_up_all(&ep->rep_connect_wait);
+	}
+}
+
+static void
+rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
+{
+	struct rpcrdma_ep *ep = context;
+
+	dprintk("RPC:       %s: CQ error %X on device %s ep %p\n",
+		__func__, event->event, event->device->name, context);
+	if (ep->rep_connected == 1) {
+		ep->rep_connected = -EIO;
+		ep->rep_func(ep);
+		wake_up_all(&ep->rep_connect_wait);
+	}
+}
+
+static inline
+void rpcrdma_event_process(struct ib_wc *wc)
+{
+	struct rpcrdma_rep *rep =
+			(struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+
+	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
+		__func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+	if (!rep) /* send or bind completion that we don't care about */
+		return;
+
+	if (IB_WC_SUCCESS != wc->status) {
+		dprintk("RPC:       %s: %s WC status %X, connection lost\n",
+			__func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
+			 wc->status);
+		rep->rr_len = ~0U;
+		rpcrdma_schedule_tasklet(rep);
+		return;
+	}
+
+	switch (wc->opcode) {
+	case IB_WC_RECV:
+		rep->rr_len = wc->byte_len;
+		ib_dma_sync_single_for_cpu(
+			rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+		/* Keep (only) the most recent credits, after check validity */
+		if (rep->rr_len >= 16) {
+			struct rpcrdma_msg *p =
+					(struct rpcrdma_msg *) rep->rr_base;
+			unsigned int credits = ntohl(p->rm_credit);
+			if (credits == 0) {
+				dprintk("RPC:       %s: server"
+					" dropped credits to 0!\n", __func__);
+				/* don't deadlock */
+				credits = 1;
+			} else if (credits > rep->rr_buffer->rb_max_requests) {
+				dprintk("RPC:       %s: server"
+					" over-crediting: %d (%d)\n",
+					__func__, credits,
+					rep->rr_buffer->rb_max_requests);
+				credits = rep->rr_buffer->rb_max_requests;
+			}
+			atomic_set(&rep->rr_buffer->rb_credits, credits);
+		}
+		/* fall through */
+	case IB_WC_BIND_MW:
+		rpcrdma_schedule_tasklet(rep);
+		break;
+	default:
+		dprintk("RPC:       %s: unexpected WC event %X\n",
+			__func__, wc->opcode);
+		break;
+	}
+}
+
+static inline int
+rpcrdma_cq_poll(struct ib_cq *cq)
+{
+	struct ib_wc wc;
+	int rc;
+
+	for (;;) {
+		rc = ib_poll_cq(cq, 1, &wc);
+		if (rc < 0) {
+			dprintk("RPC:       %s: ib_poll_cq failed %i\n",
+				__func__, rc);
+			return rc;
+		}
+		if (rc == 0)
+			break;
+
+		rpcrdma_event_process(&wc);
+	}
+
+	return 0;
+}
+
+/*
+ * rpcrdma_cq_event_upcall
+ *
+ * This upcall handles recv, send, bind and unbind events.
+ * It is reentrant but processes single events in order to maintain
+ * ordering of receives to keep server credits.
+ *
+ * It is the responsibility of the scheduled tasklet to return
+ * recv buffers to the pool. NOTE: this affects synchronization of
+ * connection shutdown. That is, the structures required for
+ * the completion of the reply handler must remain intact until
+ * all memory has been reclaimed.
+ *
+ * Note that send events are suppressed and do not result in an upcall.
+ */
+static void
+rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+{
+	int rc;
+
+	rc = rpcrdma_cq_poll(cq);
+	if (rc)
+		return;
+
+	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	if (rc) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed %i\n",
+			__func__, rc);
+		return;
+	}
+
+	rpcrdma_cq_poll(cq);
+}
+
+#ifdef RPC_DEBUG
+static const char * const conn[] = {
+	"address resolved",
+	"address error",
+	"route resolved",
+	"route error",
+	"connect request",
+	"connect response",
+	"connect error",
+	"unreachable",
+	"rejected",
+	"established",
+	"disconnected",
+	"device removal"
+};
+#endif
+
+static int
+rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+	struct rpcrdma_xprt *xprt = id->context;
+	struct rpcrdma_ia *ia = &xprt->rx_ia;
+	struct rpcrdma_ep *ep = &xprt->rx_ep;
+	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+	struct ib_qp_attr attr;
+	struct ib_qp_init_attr iattr;
+	int connstate = 0;
+
+	switch (event->event) {
+	case RDMA_CM_EVENT_ADDR_RESOLVED:
+	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		complete(&ia->ri_done);
+		break;
+	case RDMA_CM_EVENT_ADDR_ERROR:
+		ia->ri_async_rc = -EHOSTUNREACH;
+		dprintk("RPC:       %s: CM address resolution error, ep 0x%p\n",
+			__func__, ep);
+		complete(&ia->ri_done);
+		break;
+	case RDMA_CM_EVENT_ROUTE_ERROR:
+		ia->ri_async_rc = -ENETUNREACH;
+		dprintk("RPC:       %s: CM route resolution error, ep 0x%p\n",
+			__func__, ep);
+		complete(&ia->ri_done);
+		break;
+	case RDMA_CM_EVENT_ESTABLISHED:
+		connstate = 1;
+		ib_query_qp(ia->ri_id->qp, &attr,
+			IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
+			&iattr);
+		dprintk("RPC:       %s: %d responder resources"
+			" (%d initiator)\n",
+			__func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
+		goto connected;
+	case RDMA_CM_EVENT_CONNECT_ERROR:
+		connstate = -ENOTCONN;
+		goto connected;
+	case RDMA_CM_EVENT_UNREACHABLE:
+		connstate = -ENETDOWN;
+		goto connected;
+	case RDMA_CM_EVENT_REJECTED:
+		connstate = -ECONNREFUSED;
+		goto connected;
+	case RDMA_CM_EVENT_DISCONNECTED:
+		connstate = -ECONNABORTED;
+		goto connected;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		connstate = -ENODEV;
+connected:
+		dprintk("RPC:       %s: %s: %u.%u.%u.%u:%u"
+			" (ep 0x%p event 0x%x)\n",
+			__func__,
+			(event->event <= 11) ? conn[event->event] :
+						"unknown connection error",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port),
+			ep, event->event);
+		atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
+		dprintk("RPC:       %s: %sconnected\n",
+					__func__, connstate > 0 ? "" : "dis");
+		ep->rep_connected = connstate;
+		ep->rep_func(ep);
+		wake_up_all(&ep->rep_connect_wait);
+		break;
+	default:
+		ia->ri_async_rc = -EINVAL;
+		dprintk("RPC:       %s: unexpected CM event %X\n",
+			__func__, event->event);
+		complete(&ia->ri_done);
+		break;
+	}
+
+	return 0;
+}
+
+static struct rdma_cm_id *
+rpcrdma_create_id(struct rpcrdma_xprt *xprt,
+			struct rpcrdma_ia *ia, struct sockaddr *addr)
+{
+	struct rdma_cm_id *id;
+	int rc;
+
+	id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
+	if (IS_ERR(id)) {
+		rc = PTR_ERR(id);
+		dprintk("RPC:       %s: rdma_create_id() failed %i\n",
+			__func__, rc);
+		return id;
+	}
+
+	ia->ri_async_rc = 0;
+	rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_resolve_addr() failed %i\n",
+			__func__, rc);
+		goto out;
+	}
+	wait_for_completion(&ia->ri_done);
+	rc = ia->ri_async_rc;
+	if (rc)
+		goto out;
+
+	ia->ri_async_rc = 0;
+	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
+			__func__, rc);
+		goto out;
+	}
+	wait_for_completion(&ia->ri_done);
+	rc = ia->ri_async_rc;
+	if (rc)
+		goto out;
+
+	return id;
+
+out:
+	rdma_destroy_id(id);
+	return ERR_PTR(rc);
+}
+
+/*
+ * Drain any cq, prior to teardown.
+ */
+static void
+rpcrdma_clean_cq(struct ib_cq *cq)
+{
+	struct ib_wc wc;
+	int count = 0;
+
+	while (1 == ib_poll_cq(cq, 1, &wc))
+		++count;
+
+	if (count)
+		dprintk("RPC:       %s: flushed %d events (last 0x%x)\n",
+			__func__, count, wc.opcode);
+}
+
+/*
+ * Exported functions.
+ */
+
+/*
+ * Open and initialize an Interface Adapter.
+ *  o initializes fields of struct rpcrdma_ia, including
+ *    interface and provider attributes and protection zone.
+ */
+int
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+{
+	int rc;
+	struct rpcrdma_ia *ia = &xprt->rx_ia;
+
+	init_completion(&ia->ri_done);
+
+	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+	if (IS_ERR(ia->ri_id)) {
+		rc = PTR_ERR(ia->ri_id);
+		goto out1;
+	}
+
+	ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+	if (IS_ERR(ia->ri_pd)) {
+		rc = PTR_ERR(ia->ri_pd);
+		dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
+			__func__, rc);
+		goto out2;
+	}
+
+	/*
+	 * Optionally obtain an underlying physical identity mapping in
+	 * order to do a memory window-based bind. This base registration
+	 * is protected from remote access - that is enabled only by binding
+	 * for the specific bytes targeted during each RPC operation, and
+	 * revoked after the corresponding completion similar to a storage
+	 * adapter.
+	 */
+	if (memreg > RPCRDMA_REGISTER) {
+		int mem_priv = IB_ACCESS_LOCAL_WRITE;
+		switch (memreg) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+		case RPCRDMA_ALLPHYSICAL:
+			mem_priv |= IB_ACCESS_REMOTE_WRITE;
+			mem_priv |= IB_ACCESS_REMOTE_READ;
+			break;
+#endif
+		case RPCRDMA_MEMWINDOWS_ASYNC:
+		case RPCRDMA_MEMWINDOWS:
+			mem_priv |= IB_ACCESS_MW_BIND;
+			break;
+		default:
+			break;
+		}
+		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
+		if (IS_ERR(ia->ri_bind_mem)) {
+			printk(KERN_ALERT "%s: ib_get_dma_mr for "
+				"phys register failed with %lX\n\t"
+				"Will continue with degraded performance\n",
+				__func__, PTR_ERR(ia->ri_bind_mem));
+			memreg = RPCRDMA_REGISTER;
+			ia->ri_bind_mem = NULL;
+		}
+	}
+
+	/* Else will do memory reg/dereg for each chunk */
+	ia->ri_memreg_strategy = memreg;
+
+	return 0;
+out2:
+	rdma_destroy_id(ia->ri_id);
+out1:
+	return rc;
+}
+
+/*
+ * Clean up/close an IA.
+ *   o if event handles and PD have been initialized, free them.
+ *   o close the IA
+ */
+void
+rpcrdma_ia_close(struct rpcrdma_ia *ia)
+{
+	int rc;
+
+	dprintk("RPC:       %s: entering\n", __func__);
+	if (ia->ri_bind_mem != NULL) {
+		rc = ib_dereg_mr(ia->ri_bind_mem);
+		dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
+			__func__, rc);
+	}
+	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
+		rdma_destroy_qp(ia->ri_id);
+	if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
+		rc = ib_dealloc_pd(ia->ri_pd);
+		dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",
+			__func__, rc);
+	}
+	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
+		rdma_destroy_id(ia->ri_id);
+}
+
+/*
+ * Create unconnected endpoint.
+ */
+int
+rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+				struct rpcrdma_create_data_internal *cdata)
+{
+	struct ib_device_attr devattr;
+	int rc;
+
+	rc = ib_query_device(ia->ri_id->device, &devattr);
+	if (rc) {
+		dprintk("RPC:       %s: ib_query_device failed %d\n",
+			__func__, rc);
+		return rc;
+	}
+
+	/* check provider's send/recv wr limits */
+	if (cdata->max_requests > devattr.max_qp_wr)
+		cdata->max_requests = devattr.max_qp_wr;
+
+	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+	ep->rep_attr.qp_context = ep;
+	/* send_cq and recv_cq initialized below */
+	ep->rep_attr.srq = NULL;
+	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		/* Add room for mw_binds+unbinds - overkill! */
+		ep->rep_attr.cap.max_send_wr++;
+		ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
+		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+			return -EINVAL;
+		break;
+	default:
+		break;
+	}
+	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
+	ep->rep_attr.cap.max_recv_sge = 1;
+	ep->rep_attr.cap.max_inline_data = 0;
+	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+	ep->rep_attr.qp_type = IB_QPT_RC;
+	ep->rep_attr.port_num = ~0;
+
+	dprintk("RPC:       %s: requested max: dtos: send %d recv %d; "
+		"iovs: send %d recv %d\n",
+		__func__,
+		ep->rep_attr.cap.max_send_wr,
+		ep->rep_attr.cap.max_recv_wr,
+		ep->rep_attr.cap.max_send_sge,
+		ep->rep_attr.cap.max_recv_sge);
+
+	/* set trigger for requesting send completion */
+	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /*  - 1*/;
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
+		break;
+	default:
+		break;
+	}
+	if (ep->rep_cqinit <= 2)
+		ep->rep_cqinit = 0;
+	INIT_CQCOUNT(ep);
+	ep->rep_ia = ia;
+	init_waitqueue_head(&ep->rep_connect_wait);
+
+	/*
+	 * Create a single cq for receive dto and mw_bind (only ever
+	 * care about unbind, really). Send completions are suppressed.
+	 * Use single threaded tasklet upcalls to maintain ordering.
+	 */
+	ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
+				  rpcrdma_cq_async_error_upcall, NULL,
+				  ep->rep_attr.cap.max_recv_wr +
+				  ep->rep_attr.cap.max_send_wr + 1, 0);
+	if (IS_ERR(ep->rep_cq)) {
+		rc = PTR_ERR(ep->rep_cq);
+		dprintk("RPC:       %s: ib_create_cq failed: %i\n",
+			__func__, rc);
+		goto out1;
+	}
+
+	rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+	if (rc) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+			__func__, rc);
+		goto out2;
+	}
+
+	ep->rep_attr.send_cq = ep->rep_cq;
+	ep->rep_attr.recv_cq = ep->rep_cq;
+
+	/* Initialize cma parameters */
+
+	/* RPC/RDMA does not use private data */
+	ep->rep_remote_cma.private_data = NULL;
+	ep->rep_remote_cma.private_data_len = 0;
+
+	/* Client offers RDMA Read but does not initiate */
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_BOUNCEBUFFERS:
+		ep->rep_remote_cma.responder_resources = 0;
+		break;
+	case RPCRDMA_MTHCAFMR:
+	case RPCRDMA_REGISTER:
+		ep->rep_remote_cma.responder_resources = cdata->max_requests *
+				(RPCRDMA_MAX_DATA_SEGS / 8);
+		break;
+	case RPCRDMA_MEMWINDOWS:
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+#if RPCRDMA_PERSISTENT_REGISTRATION
+	case RPCRDMA_ALLPHYSICAL:
+#endif
+		ep->rep_remote_cma.responder_resources = cdata->max_requests *
+				(RPCRDMA_MAX_DATA_SEGS / 2);
+		break;
+	default:
+		break;
+	}
+	if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+		ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
+	ep->rep_remote_cma.initiator_depth = 0;
+
+	ep->rep_remote_cma.retry_count = 7;
+	ep->rep_remote_cma.flow_control = 0;
+	ep->rep_remote_cma.rnr_retry_count = 0;
+
+	return 0;
+
+out2:
+	if (ib_destroy_cq(ep->rep_cq))
+		;
+out1:
+	return rc;
+}
+
+/*
+ * rpcrdma_ep_destroy
+ *
+ * Disconnect and destroy endpoint. After this, the only
+ * valid operations on the ep are to free it (if dynamically
+ * allocated) or re-create it.
+ *
+ * The caller's error handling must be sure to not leak the endpoint
+ * if this function fails.
+ */
+int
+rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	int rc;
+
+	dprintk("RPC:       %s: entering, connected is %d\n",
+		__func__, ep->rep_connected);
+
+	if (ia->ri_id->qp) {
+		rc = rpcrdma_ep_disconnect(ep, ia);
+		if (rc)
+			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
+				" returned %i\n", __func__, rc);
+	}
+
+	ep->rep_func = NULL;
+
+	/* padding - could be done in rpcrdma_buffer_destroy... */
+	if (ep->rep_pad_mr) {
+		rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
+		ep->rep_pad_mr = NULL;
+	}
+
+	if (ia->ri_id->qp) {
+		rdma_destroy_qp(ia->ri_id);
+		ia->ri_id->qp = NULL;
+	}
+
+	rpcrdma_clean_cq(ep->rep_cq);
+	rc = ib_destroy_cq(ep->rep_cq);
+	if (rc)
+		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+			__func__, rc);
+
+	return rc;
+}
+
+/*
+ * Connect unconnected endpoint.
+ */
+int
+rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	struct rdma_cm_id *id;
+	int rc = 0;
+	int retry_count = 0;
+	int reconnect = (ep->rep_connected != 0);
+
+	if (reconnect) {
+		struct rpcrdma_xprt *xprt;
+retry:
+		rc = rpcrdma_ep_disconnect(ep, ia);
+		if (rc && rc != -ENOTCONN)
+			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
+				" status %i\n", __func__, rc);
+		rpcrdma_clean_cq(ep->rep_cq);
+
+		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+		id = rpcrdma_create_id(xprt, ia,
+				(struct sockaddr *)&xprt->rx_data.addr);
+		if (IS_ERR(id)) {
+			rc = PTR_ERR(id);
+			goto out;
+		}
+		/* TEMP TEMP TEMP - fail if new device:
+		 * Deregister/remarshal *all* requests!
+		 * Close and recreate adapter, pd, etc!
+		 * Re-determine all attributes still sane!
+		 * More stuff I haven't thought of!
+		 * Rrrgh!
+		 */
+		if (ia->ri_id->device != id->device) {
+			printk("RPC:       %s: can't reconnect on "
+				"different device!\n", __func__);
+			rdma_destroy_id(id);
+			rc = -ENETDOWN;
+			goto out;
+		}
+		/* END TEMP */
+		rdma_destroy_id(ia->ri_id);
+		ia->ri_id = id;
+	}
+
+	rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_create_qp failed %i\n",
+			__func__, rc);
+		goto out;
+	}
+
+/* XXX Tavor device performs badly with 2K MTU! */
+if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
+	struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
+	if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
+	    (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
+	     pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
+		struct ib_qp_attr attr = {
+			.path_mtu = IB_MTU_1024
+		};
+		rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
+	}
+}
+
+	/* Theoretically a client initiator_depth > 0 is not needed,
+	 * but many peers fail to complete the connection unless they
+	 * == responder_resources! */
+	if (ep->rep_remote_cma.initiator_depth !=
+				ep->rep_remote_cma.responder_resources)
+		ep->rep_remote_cma.initiator_depth =
+			ep->rep_remote_cma.responder_resources;
+
+	ep->rep_connected = 0;
+
+	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_connect() failed with %i\n",
+				__func__, rc);
+		goto out;
+	}
+
+	if (reconnect)
+		return 0;
+
+	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
+
+	/*
+	 * Check state. A non-peer reject indicates no listener
+	 * (ECONNREFUSED), which may be a transient state. All
+	 * others indicate a transport condition which has already
+	 * undergone a best-effort.
+	 */
+	if (ep->rep_connected == -ECONNREFUSED
+	    && ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
+		dprintk("RPC:       %s: non-peer_reject, retry\n", __func__);
+		goto retry;
+	}
+	if (ep->rep_connected <= 0) {
+		/* Sometimes, the only way to reliably connect to remote
+		 * CMs is to use same nonzero values for ORD and IRD. */
+		ep->rep_remote_cma.initiator_depth =
+					ep->rep_remote_cma.responder_resources;
+		if (ep->rep_remote_cma.initiator_depth == 0)
+			++ep->rep_remote_cma.initiator_depth;
+		if (ep->rep_remote_cma.responder_resources == 0)
+			++ep->rep_remote_cma.responder_resources;
+		if (retry_count++ == 0)
+			goto retry;
+		rc = ep->rep_connected;
+	} else {
+		dprintk("RPC:       %s: connected\n", __func__);
+	}
+
+out:
+	if (rc)
+		ep->rep_connected = rc;
+	return rc;
+}
+
+/*
+ * rpcrdma_ep_disconnect
+ *
+ * This is separate from destroy to facilitate the ability
+ * to reconnect without recreating the endpoint.
+ *
+ * This call is not reentrant, and must not be made in parallel
+ * on the same endpoint.
+ */
+int
+rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	int rc;
+
+	rpcrdma_clean_cq(ep->rep_cq);
+	rc = rdma_disconnect(ia->ri_id);
+	if (!rc) {
+		/* returns without wait if not connected */
+		wait_event_interruptible(ep->rep_connect_wait,
+							ep->rep_connected != 1);
+		dprintk("RPC:       %s: after wait, %sconnected\n", __func__,
+			(ep->rep_connected == 1) ? "still " : "dis");
+	} else {
+		dprintk("RPC:       %s: rdma_disconnect %i\n", __func__, rc);
+		ep->rep_connected = rc;
+	}
+	return rc;
+}
+
+/*
+ * Initialize buffer memory
+ */
+int
+rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+	struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
+{
+	char *p;
+	size_t len;
+	int i, rc;
+
+	buf->rb_max_requests = cdata->max_requests;
+	spin_lock_init(&buf->rb_lock);
+	atomic_set(&buf->rb_credits, 1);
+
+	/* Need to allocate:
+	 *   1.  arrays for send and recv pointers
+	 *   2.  arrays of struct rpcrdma_req to fill in pointers
+	 *   3.  array of struct rpcrdma_rep for replies
+	 *   4.  padding, if any
+	 *   5.  mw's, if any
+	 * Send/recv buffers in req/rep need to be registered
+	 */
+
+	len = buf->rb_max_requests *
+		(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
+	len += cdata->padding;
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MTHCAFMR:
+		/* TBD we are perhaps overallocating here */
+		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+				sizeof(struct rpcrdma_mw);
+		break;
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+				sizeof(struct rpcrdma_mw);
+		break;
+	default:
+		break;
+	}
+
+	/* allocate 1, 4 and 5 in one shot */
+	p = kzalloc(len, GFP_KERNEL);
+	if (p == NULL) {
+		dprintk("RPC:       %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
+			__func__, len);
+		rc = -ENOMEM;
+		goto out;
+	}
+	buf->rb_pool = p;	/* for freeing it later */
+
+	buf->rb_send_bufs = (struct rpcrdma_req **) p;
+	p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
+	buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
+	p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
+
+	/*
+	 * Register the zeroed pad buffer, if any.
+	 */
+	if (cdata->padding) {
+		rc = rpcrdma_register_internal(ia, p, cdata->padding,
+					    &ep->rep_pad_mr, &ep->rep_pad);
+		if (rc)
+			goto out;
+	}
+	p += cdata->padding;
+
+	/*
+	 * Allocate the fmr's, or mw's for mw_bind chunk registration.
+	 * We "cycle" the mw's in order to minimize rkey reuse,
+	 * and also reduce unbind-to-bind collision.
+	 */
+	INIT_LIST_HEAD(&buf->rb_mws);
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MTHCAFMR:
+		{
+		struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+		struct ib_fmr_attr fa = {
+			RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
+		};
+		/* TBD we are perhaps overallocating here */
+		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+			r->r.fmr = ib_alloc_fmr(ia->ri_pd,
+				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
+				&fa);
+			if (IS_ERR(r->r.fmr)) {
+				rc = PTR_ERR(r->r.fmr);
+				dprintk("RPC:       %s: ib_alloc_fmr"
+					" failed %i\n", __func__, rc);
+				goto out;
+			}
+			list_add(&r->mw_list, &buf->rb_mws);
+			++r;
+		}
+		}
+		break;
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		{
+		struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+		/* Allocate one extra request's worth, for full cycling */
+		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+			r->r.mw = ib_alloc_mw(ia->ri_pd);
+			if (IS_ERR(r->r.mw)) {
+				rc = PTR_ERR(r->r.mw);
+				dprintk("RPC:       %s: ib_alloc_mw"
+					" failed %i\n", __func__, rc);
+				goto out;
+			}
+			list_add(&r->mw_list, &buf->rb_mws);
+			++r;
+		}
+		}
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Allocate/init the request/reply buffers. Doing this
+	 * using kmalloc for now -- one for each buf.
+	 */
+	for (i = 0; i < buf->rb_max_requests; i++) {
+		struct rpcrdma_req *req;
+		struct rpcrdma_rep *rep;
+
+		len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
+		/* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
+		/* Typical ~2400b, so rounding up saves work later */
+		if (len < 4096)
+			len = 4096;
+		req = kmalloc(len, GFP_KERNEL);
+		if (req == NULL) {
+			dprintk("RPC:       %s: request buffer %d alloc"
+				" failed\n", __func__, i);
+			rc = -ENOMEM;
+			goto out;
+		}
+		memset(req, 0, sizeof(struct rpcrdma_req));
+		buf->rb_send_bufs[i] = req;
+		buf->rb_send_bufs[i]->rl_buffer = buf;
+
+		rc = rpcrdma_register_internal(ia, req->rl_base,
+				len - offsetof(struct rpcrdma_req, rl_base),
+				&buf->rb_send_bufs[i]->rl_handle,
+				&buf->rb_send_bufs[i]->rl_iov);
+		if (rc)
+			goto out;
+
+		buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+
+		len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
+		rep = kmalloc(len, GFP_KERNEL);
+		if (rep == NULL) {
+			dprintk("RPC:       %s: reply buffer %d alloc failed\n",
+				__func__, i);
+			rc = -ENOMEM;
+			goto out;
+		}
+		memset(rep, 0, sizeof(struct rpcrdma_rep));
+		buf->rb_recv_bufs[i] = rep;
+		buf->rb_recv_bufs[i]->rr_buffer = buf;
+		init_waitqueue_head(&rep->rr_unbind);
+
+		rc = rpcrdma_register_internal(ia, rep->rr_base,
+				len - offsetof(struct rpcrdma_rep, rr_base),
+				&buf->rb_recv_bufs[i]->rr_handle,
+				&buf->rb_recv_bufs[i]->rr_iov);
+		if (rc)
+			goto out;
+
+	}
+	dprintk("RPC:       %s: max_requests %d\n",
+		__func__, buf->rb_max_requests);
+	/* done */
+	return 0;
+out:
+	rpcrdma_buffer_destroy(buf);
+	return rc;
+}
+
+/*
+ * Unregister and destroy buffer memory. Need to deal with
+ * partial initialization, so it's callable from failed create.
+ * Must be called before destroying endpoint, as registrations
+ * reference it.
+ */
+void
+rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+{
+	int rc, i;
+	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+
+	/* clean up in reverse order from create
+	 *   1.  recv mr memory (mr free, then kfree)
+	 *   1a. bind mw memory
+	 *   2.  send mr memory (mr free, then kfree)
+	 *   3.  padding (if any) [moved to rpcrdma_ep_destroy]
+	 *   4.  arrays
+	 */
+	dprintk("RPC:       %s: entering\n", __func__);
+
+	for (i = 0; i < buf->rb_max_requests; i++) {
+		if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
+			rpcrdma_deregister_internal(ia,
+					buf->rb_recv_bufs[i]->rr_handle,
+					&buf->rb_recv_bufs[i]->rr_iov);
+			kfree(buf->rb_recv_bufs[i]);
+		}
+		if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
+			while (!list_empty(&buf->rb_mws)) {
+				struct rpcrdma_mw *r;
+				r = list_entry(buf->rb_mws.next,
+					struct rpcrdma_mw, mw_list);
+				list_del(&r->mw_list);
+				switch (ia->ri_memreg_strategy) {
+				case RPCRDMA_MTHCAFMR:
+					rc = ib_dealloc_fmr(r->r.fmr);
+					if (rc)
+						dprintk("RPC:       %s:"
+							" ib_dealloc_fmr"
+							" failed %i\n",
+							__func__, rc);
+					break;
+				case RPCRDMA_MEMWINDOWS_ASYNC:
+				case RPCRDMA_MEMWINDOWS:
+					rc = ib_dealloc_mw(r->r.mw);
+					if (rc)
+						dprintk("RPC:       %s:"
+							" ib_dealloc_mw"
+							" failed %i\n",
+							__func__, rc);
+					break;
+				default:
+					break;
+				}
+			}
+			rpcrdma_deregister_internal(ia,
+					buf->rb_send_bufs[i]->rl_handle,
+					&buf->rb_send_bufs[i]->rl_iov);
+			kfree(buf->rb_send_bufs[i]);
+		}
+	}
+
+	kfree(buf->rb_pool);
+}
+
+/*
+ * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if needed) is attached to send buffer upon return.
+ * Rule:
+ *    rb_send_index and rb_recv_index MUST always be pointing to the
+ *    *next* available buffer (non-NULL). They are incremented after
+ *    removing buffers, and decremented *before* returning them.
+ */
+struct rpcrdma_req *
+rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
+{
+	struct rpcrdma_req *req;
+	unsigned long flags;
+
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	if (buffers->rb_send_index == buffers->rb_max_requests) {
+		spin_unlock_irqrestore(&buffers->rb_lock, flags);
+		dprintk("RPC:       %s: out of request buffers\n", __func__);
+		return ((struct rpcrdma_req *)NULL);
+	}
+
+	req = buffers->rb_send_bufs[buffers->rb_send_index];
+	if (buffers->rb_send_index < buffers->rb_recv_index) {
+		dprintk("RPC:       %s: %d extra receives outstanding (ok)\n",
+			__func__,
+			buffers->rb_recv_index - buffers->rb_send_index);
+		req->rl_reply = NULL;
+	} else {
+		req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+		buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+	}
+	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
+	if (!list_empty(&buffers->rb_mws)) {
+		int i = RPCRDMA_MAX_SEGS - 1;
+		do {
+			struct rpcrdma_mw *r;
+			r = list_entry(buffers->rb_mws.next,
+					struct rpcrdma_mw, mw_list);
+			list_del(&r->mw_list);
+			req->rl_segments[i].mr_chunk.rl_mw = r;
+		} while (--i >= 0);
+	}
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	return req;
+}
+
+/*
+ * Put request/reply buffers back into pool.
+ * Pre-decrement counter/array index.
+ */
+void
+rpcrdma_buffer_put(struct rpcrdma_req *req)
+{
+	struct rpcrdma_buffer *buffers = req->rl_buffer;
+	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
+	int i;
+	unsigned long flags;
+
+	BUG_ON(req->rl_nchunks != 0);
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	buffers->rb_send_bufs[--buffers->rb_send_index] = req;
+	req->rl_niovs = 0;
+	if (req->rl_reply) {
+		buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
+		init_waitqueue_head(&req->rl_reply->rr_unbind);
+		req->rl_reply->rr_func = NULL;
+		req->rl_reply = NULL;
+	}
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MTHCAFMR:
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		/*
+		 * Cycle mw's back in reverse order, and "spin" them.
+		 * This delays and scrambles reuse as much as possible.
+		 */
+		i = 1;
+		do {
+			struct rpcrdma_mw **mw;
+			mw = &req->rl_segments[i].mr_chunk.rl_mw;
+			list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
+			*mw = NULL;
+		} while (++i < RPCRDMA_MAX_SEGS);
+		list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
+					&buffers->rb_mws);
+		req->rl_segments[0].mr_chunk.rl_mw = NULL;
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Recover reply buffers from pool.
+ * This happens when recovering from error conditions.
+ * Post-increment counter/array index.
+ */
+void
+rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
+{
+	struct rpcrdma_buffer *buffers = req->rl_buffer;
+	unsigned long flags;
+
+	if (req->rl_iov.length == 0)	/* special case xprt_rdma_allocate() */
+		buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	if (buffers->rb_recv_index < buffers->rb_max_requests) {
+		req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+		buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+	}
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Put reply buffers back into pool when not attached to
+ * request. This happens in error conditions, and when
+ * aborting unbinds. Pre-decrement counter/array index.
+ */
+void
+rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_buffer *buffers = rep->rr_buffer;
+	unsigned long flags;
+
+	rep->rr_func = NULL;
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Wrappers for internal-use kmalloc memory registration, used by buffer code.
+ */
+
+int
+rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
+				struct ib_mr **mrp, struct ib_sge *iov)
+{
+	struct ib_phys_buf ipb;
+	struct ib_mr *mr;
+	int rc;
+
+	/*
+	 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
+	 */
+	iov->addr = ib_dma_map_single(ia->ri_id->device,
+			va, len, DMA_BIDIRECTIONAL);
+	iov->length = len;
+
+	if (ia->ri_bind_mem != NULL) {
+		*mrp = NULL;
+		iov->lkey = ia->ri_bind_mem->lkey;
+		return 0;
+	}
+
+	ipb.addr = iov->addr;
+	ipb.size = iov->length;
+	mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
+			IB_ACCESS_LOCAL_WRITE, &iov->addr);
+
+	dprintk("RPC:       %s: phys convert: 0x%llx "
+			"registered 0x%llx length %d\n",
+			__func__, ipb.addr, iov->addr, len);
+
+	if (IS_ERR(mr)) {
+		*mrp = NULL;
+		rc = PTR_ERR(mr);
+		dprintk("RPC:       %s: failed with %i\n", __func__, rc);
+	} else {
+		*mrp = mr;
+		iov->lkey = mr->lkey;
+		rc = 0;
+	}
+
+	return rc;
+}
+
+int
+rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
+				struct ib_mr *mr, struct ib_sge *iov)
+{
+	int rc;
+
+	ib_dma_unmap_single(ia->ri_id->device,
+			iov->addr, iov->length, DMA_BIDIRECTIONAL);
+
+	if (NULL == mr)
+		return 0;
+
+	rc = ib_dereg_mr(mr);
+	if (rc)
+		dprintk("RPC:       %s: ib_dereg_mr failed %i\n", __func__, rc);
+	return rc;
+}
+
+/*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+static void
+rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
+{
+	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	seg->mr_dmalen = seg->mr_len;
+	if (seg->mr_page)
+		seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
+				seg->mr_page, offset_in_page(seg->mr_offset),
+				seg->mr_dmalen, seg->mr_dir);
+	else
+		seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
+				seg->mr_offset,
+				seg->mr_dmalen, seg->mr_dir);
+}
+
+static void
+rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
+{
+	if (seg->mr_page)
+		ib_dma_unmap_page(ia->ri_id->device,
+				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+	else
+		ib_dma_unmap_single(ia->ri_id->device,
+				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
+int
+rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+				  IB_ACCESS_REMOTE_READ);
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int i;
+	int rc = 0;
+
+	switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+	case RPCRDMA_ALLPHYSICAL:
+		rpcrdma_map_one(ia, seg, writing);
+		seg->mr_rkey = ia->ri_bind_mem->rkey;
+		seg->mr_base = seg->mr_dma;
+		seg->mr_nsegs = 1;
+		nsegs = 1;
+		break;
+#endif
+
+	/* Registration using fast memory registration */
+	case RPCRDMA_MTHCAFMR:
+		{
+		u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+		int len, pageoff = offset_in_page(seg->mr_offset);
+		seg1->mr_offset -= pageoff;	/* start of page */
+		seg1->mr_len += pageoff;
+		len = -pageoff;
+		if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+			nsegs = RPCRDMA_MAX_DATA_SEGS;
+		for (i = 0; i < nsegs;) {
+			rpcrdma_map_one(ia, seg, writing);
+			physaddrs[i] = seg->mr_dma;
+			len += seg->mr_len;
+			++seg;
+			++i;
+			/* Check for holes */
+			if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+			    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+				break;
+		}
+		nsegs = i;
+		rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+					physaddrs, nsegs, seg1->mr_dma);
+		if (rc) {
+			dprintk("RPC:       %s: failed ib_map_phys_fmr "
+				"%u@0x%llx+%i (%d)... status %i\n", __func__,
+				len, (unsigned long long)seg1->mr_dma,
+				pageoff, nsegs, rc);
+			while (nsegs--)
+				rpcrdma_unmap_one(ia, --seg);
+		} else {
+			seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+			seg1->mr_base = seg1->mr_dma + pageoff;
+			seg1->mr_nsegs = nsegs;
+			seg1->mr_len = len;
+		}
+		}
+		break;
+
+	/* Registration using memory windows */
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		{
+		struct ib_mw_bind param;
+		rpcrdma_map_one(ia, seg, writing);
+		param.mr = ia->ri_bind_mem;
+		param.wr_id = 0ULL;	/* no send cookie */
+		param.addr = seg->mr_dma;
+		param.length = seg->mr_len;
+		param.send_flags = 0;
+		param.mw_access_flags = mem_priv;
+
+		DECR_CQCOUNT(&r_xprt->rx_ep);
+		rc = ib_bind_mw(ia->ri_id->qp,
+					seg->mr_chunk.rl_mw->r.mw, &param);
+		if (rc) {
+			dprintk("RPC:       %s: failed ib_bind_mw "
+				"%u@0x%llx status %i\n",
+				__func__, seg->mr_len,
+				(unsigned long long)seg->mr_dma, rc);
+			rpcrdma_unmap_one(ia, seg);
+		} else {
+			seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+			seg->mr_base = param.addr;
+			seg->mr_nsegs = 1;
+			nsegs = 1;
+		}
+		}
+		break;
+
+	/* Default registration each time */
+	default:
+		{
+		struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+		int len = 0;
+		if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+			nsegs = RPCRDMA_MAX_DATA_SEGS;
+		for (i = 0; i < nsegs;) {
+			rpcrdma_map_one(ia, seg, writing);
+			ipb[i].addr = seg->mr_dma;
+			ipb[i].size = seg->mr_len;
+			len += seg->mr_len;
+			++seg;
+			++i;
+			/* Check for holes */
+			if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+			    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+				break;
+		}
+		nsegs = i;
+		seg1->mr_base = seg1->mr_dma;
+		seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+					ipb, nsegs, mem_priv, &seg1->mr_base);
+		if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+			rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+			dprintk("RPC:       %s: failed ib_reg_phys_mr "
+				"%u@0x%llx (%d)... status %i\n",
+				__func__, len,
+				(unsigned long long)seg1->mr_dma, nsegs, rc);
+			while (nsegs--)
+				rpcrdma_unmap_one(ia, --seg);
+		} else {
+			seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+			seg1->mr_nsegs = nsegs;
+			seg1->mr_len = len;
+		}
+		}
+		break;
+	}
+	if (rc)
+		return -1;
+
+	return nsegs;
+}
+
+int
+rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+		struct rpcrdma_xprt *r_xprt, void *r)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int nsegs = seg->mr_nsegs, rc;
+
+	switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+	case RPCRDMA_ALLPHYSICAL:
+		BUG_ON(nsegs != 1);
+		rpcrdma_unmap_one(ia, seg);
+		rc = 0;
+		break;
+#endif
+
+	case RPCRDMA_MTHCAFMR:
+		{
+		LIST_HEAD(l);
+		list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
+		rc = ib_unmap_fmr(&l);
+		while (seg1->mr_nsegs--)
+			rpcrdma_unmap_one(ia, seg++);
+		}
+		if (rc)
+			dprintk("RPC:       %s: failed ib_unmap_fmr,"
+				" status %i\n", __func__, rc);
+		break;
+
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		{
+		struct ib_mw_bind param;
+		BUG_ON(nsegs != 1);
+		param.mr = ia->ri_bind_mem;
+		param.addr = 0ULL;	/* unbind */
+		param.length = 0;
+		param.mw_access_flags = 0;
+		if (r) {
+			param.wr_id = (u64) (unsigned long) r;
+			param.send_flags = IB_SEND_SIGNALED;
+			INIT_CQCOUNT(&r_xprt->rx_ep);
+		} else {
+			param.wr_id = 0ULL;
+			param.send_flags = 0;
+			DECR_CQCOUNT(&r_xprt->rx_ep);
+		}
+		rc = ib_bind_mw(ia->ri_id->qp,
+				seg->mr_chunk.rl_mw->r.mw, &param);
+		rpcrdma_unmap_one(ia, seg);
+		}
+		if (rc)
+			dprintk("RPC:       %s: failed ib_(un)bind_mw,"
+				" status %i\n", __func__, rc);
+		else
+			r = NULL;	/* will upcall on completion */
+		break;
+
+	default:
+		rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+		seg1->mr_chunk.rl_mr = NULL;
+		while (seg1->mr_nsegs--)
+			rpcrdma_unmap_one(ia, seg++);
+		if (rc)
+			dprintk("RPC:       %s: failed ib_dereg_mr,"
+				" status %i\n", __func__, rc);
+		break;
+	}
+	if (r) {
+		struct rpcrdma_rep *rep = r;
+		void (*func)(struct rpcrdma_rep *) = rep->rr_func;
+		rep->rr_func = NULL;
+		func(rep);	/* dereg done, callback now */
+	}
+	return nsegs;
+}
+
+/*
+ * Prepost any receive buffer, then post send.
+ *
+ * Receive buffer is donated to hardware, reclaimed upon recv completion.
+ */
+int
+rpcrdma_ep_post(struct rpcrdma_ia *ia,
+		struct rpcrdma_ep *ep,
+		struct rpcrdma_req *req)
+{
+	struct ib_send_wr send_wr, *send_wr_fail;
+	struct rpcrdma_rep *rep = req->rl_reply;
+	int rc;
+
+	if (rep) {
+		rc = rpcrdma_ep_post_recv(ia, ep, rep);
+		if (rc)
+			goto out;
+		req->rl_reply = NULL;
+	}
+
+	send_wr.next = NULL;
+	send_wr.wr_id = 0ULL;	/* no send cookie */
+	send_wr.sg_list = req->rl_send_iov;
+	send_wr.num_sge = req->rl_niovs;
+	send_wr.opcode = IB_WR_SEND;
+	send_wr.imm_data = 0;
+	if (send_wr.num_sge == 4)	/* no need to sync any pad (constant) */
+		ib_dma_sync_single_for_device(ia->ri_id->device,
+			req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
+			DMA_TO_DEVICE);
+	ib_dma_sync_single_for_device(ia->ri_id->device,
+		req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
+		DMA_TO_DEVICE);
+	ib_dma_sync_single_for_device(ia->ri_id->device,
+		req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
+		DMA_TO_DEVICE);
+
+	if (DECR_CQCOUNT(ep) > 0)
+		send_wr.send_flags = 0;
+	else { /* Provider must take a send completion every now and then */
+		INIT_CQCOUNT(ep);
+		send_wr.send_flags = IB_SEND_SIGNALED;
+	}
+
+	rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
+	if (rc)
+		dprintk("RPC:       %s: ib_post_send returned %i\n", __func__,
+			rc);
+out:
+	return rc;
+}
+
+/*
+ * (Re)post a receive buffer.
+ */
+int
+rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
+		     struct rpcrdma_ep *ep,
+		     struct rpcrdma_rep *rep)
+{
+	struct ib_recv_wr recv_wr, *recv_wr_fail;
+	int rc;
+
+	recv_wr.next = NULL;
+	recv_wr.wr_id = (u64) (unsigned long) rep;
+	recv_wr.sg_list = &rep->rr_iov;
+	recv_wr.num_sge = 1;
+
+	ib_dma_sync_single_for_cpu(ia->ri_id->device,
+		rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
+
+	DECR_CQCOUNT(ep);
+	rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+
+	if (rc)
+		dprintk("RPC:       %s: ib_post_recv returned %i\n", __func__,
+			rc);
+	return rc;
+}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
new file mode 100644
index 0000000..2427822
--- /dev/null
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
+#define _LINUX_SUNRPC_XPRT_RDMA_H
+
+#include <linux/wait.h> 		/* wait_queue_head_t, etc */
+#include <linux/spinlock.h> 		/* spinlock_t, etc */
+#include <asm/atomic.h>			/* atomic_t, etc */
+
+#include <rdma/rdma_cm.h>		/* RDMA connection api */
+#include <rdma/ib_verbs.h>		/* RDMA verbs api */
+
+#include <linux/sunrpc/clnt.h> 		/* rpc_xprt */
+#include <linux/sunrpc/rpc_rdma.h> 	/* RPC/RDMA protocol */
+#include <linux/sunrpc/xprtrdma.h> 	/* xprt parameters */
+
+/*
+ * Interface Adapter -- one per transport instance
+ */
+struct rpcrdma_ia {
+	struct rdma_cm_id 	*ri_id;
+	struct ib_pd		*ri_pd;
+	struct ib_mr		*ri_bind_mem;
+	struct completion	ri_done;
+	int			ri_async_rc;
+	enum rpcrdma_memreg	ri_memreg_strategy;
+};
+
+/*
+ * RDMA Endpoint -- one per transport instance
+ */
+
+struct rpcrdma_ep {
+	atomic_t		rep_cqcount;
+	int			rep_cqinit;
+	int			rep_connected;
+	struct rpcrdma_ia	*rep_ia;
+	struct ib_cq		*rep_cq;
+	struct ib_qp_init_attr	rep_attr;
+	wait_queue_head_t 	rep_connect_wait;
+	struct ib_sge		rep_pad;	/* holds zeroed pad */
+	struct ib_mr		*rep_pad_mr;	/* holds zeroed pad */
+	void			(*rep_func)(struct rpcrdma_ep *);
+	struct rpc_xprt		*rep_xprt;	/* for rep_func */
+	struct rdma_conn_param	rep_remote_cma;
+	struct sockaddr_storage	rep_remote_addr;
+};
+
+#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+
+/*
+ * struct rpcrdma_rep -- this structure encapsulates state required to recv
+ * and complete a reply, asychronously. It needs several pieces of
+ * state:
+ *   o recv buffer (posted to provider)
+ *   o ib_sge (also donated to provider)
+ *   o status of reply (length, success or not)
+ *   o bookkeeping state to get run by tasklet (list, etc)
+ *
+ * These are allocated during initialization, per-transport instance;
+ * however, the tasklet execution list itself is global, as it should
+ * always be pretty short.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ */
+
+/* temporary static scatter/gather max */
+#define RPCRDMA_MAX_DATA_SEGS	(8)	/* max scatter/gather */
+#define RPCRDMA_MAX_SEGS 	(RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
+#define MAX_RPCRDMAHDR	(\
+	/* max supported RPC/RDMA header */ \
+	sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
+	(sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
+
+struct rpcrdma_buffer;
+
+struct rpcrdma_rep {
+	unsigned int	rr_len;		/* actual received reply length */
+	struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
+	struct rpc_xprt	*rr_xprt;	/* needed for request/reply matching */
+	void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
+	struct list_head rr_list;	/* tasklet list */
+	wait_queue_head_t rr_unbind;	/* optional unbind wait */
+	struct ib_sge	rr_iov;		/* for posting */
+	struct ib_mr	*rr_handle;	/* handle for mem in rr_iov */
+	char	rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+};
+
+/*
+ * struct rpcrdma_req -- structure central to the request/reply sequence.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ *
+ * It includes pre-registered buffer memory for send AND recv.
+ * The recv buffer, however, is not owned by this structure, and
+ * is "donated" to the hardware when a recv is posted. When a
+ * reply is handled, the recv buffer used is given back to the
+ * struct rpcrdma_req associated with the request.
+ *
+ * In addition to the basic memory, this structure includes an array
+ * of iovs for send operations. The reason is that the iovs passed to
+ * ib_post_{send,recv} must not be modified until the work request
+ * completes.
+ *
+ * NOTES:
+ *   o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
+ *     marshal. The number needed varies depending on the iov lists that
+ *     are passed to us, the memory registration mode we are in, and if
+ *     physical addressing is used, the layout.
+ */
+
+struct rpcrdma_mr_seg {		/* chunk descriptors */
+	union {				/* chunk memory handles */
+		struct ib_mr	*rl_mr;		/* if registered directly */
+		struct rpcrdma_mw {		/* if registered from region */
+			union {
+				struct ib_mw	*mw;
+				struct ib_fmr	*fmr;
+			} r;
+			struct list_head mw_list;
+		} *rl_mw;
+	} mr_chunk;
+	u64		mr_base;	/* registration result */
+	u32		mr_rkey;	/* registration result */
+	u32		mr_len;		/* length of chunk or segment */
+	int		mr_nsegs;	/* number of segments in chunk or 0 */
+	enum dma_data_direction	mr_dir;	/* segment mapping direction */
+	dma_addr_t	mr_dma;		/* segment mapping address */
+	size_t		mr_dmalen;	/* segment mapping length */
+	struct page	*mr_page;	/* owning page, if any */
+	char		*mr_offset;	/* kva if no page, else offset */
+};
+
+struct rpcrdma_req {
+	size_t 		rl_size;	/* actual length of buffer */
+	unsigned int	rl_niovs;	/* 0, 2 or 4 */
+	unsigned int	rl_nchunks;	/* non-zero if chunks */
+	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
+	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
+	struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
+	struct ib_sge	rl_send_iov[4];	/* for active requests */
+	struct ib_sge	rl_iov;		/* for posting */
+	struct ib_mr	*rl_handle;	/* handle for mem in rl_iov */
+	char		rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
+	__u32 		rl_xdr_buf[0];	/* start of returned rpc rq_buffer */
+};
+#define rpcr_to_rdmar(r) \
+	container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
+
+/*
+ * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
+ * inline requests/replies, and client/server credits.
+ *
+ * One of these is associated with a transport instance
+ */
+struct rpcrdma_buffer {
+	spinlock_t	rb_lock;	/* protects indexes */
+	atomic_t	rb_credits;	/* most recent server credits */
+	unsigned long	rb_cwndscale;	/* cached framework rpc_cwndscale */
+	int		rb_max_requests;/* client max requests */
+	struct list_head rb_mws;	/* optional memory windows/fmrs */
+	int		rb_send_index;
+	struct rpcrdma_req	**rb_send_bufs;
+	int		rb_recv_index;
+	struct rpcrdma_rep	**rb_recv_bufs;
+	char		*rb_pool;
+};
+#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
+
+/*
+ * Internal structure for transport instance creation. This
+ * exists primarily for modularity.
+ *
+ * This data should be set with mount options
+ */
+struct rpcrdma_create_data_internal {
+	struct sockaddr_storage	addr;	/* RDMA server address */
+	unsigned int	max_requests;	/* max requests (slots) in flight */
+	unsigned int	rsize;		/* mount rsize - max read hdr+data */
+	unsigned int	wsize;		/* mount wsize - max write hdr+data */
+	unsigned int	inline_rsize;	/* max non-rdma read data payload */
+	unsigned int	inline_wsize;	/* max non-rdma write data payload */
+	unsigned int	padding;	/* non-rdma write header padding */
+};
+
+#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
+	(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize)
+
+#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
+	(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize)
+
+#define RPCRDMA_INLINE_PAD_VALUE(rq)\
+	rpcx_to_rdmad(rq->rq_task->tk_xprt).padding
+
+/*
+ * Statistics for RPCRDMA
+ */
+struct rpcrdma_stats {
+	unsigned long		read_chunk_count;
+	unsigned long		write_chunk_count;
+	unsigned long		reply_chunk_count;
+
+	unsigned long long	total_rdma_request;
+	unsigned long long	total_rdma_reply;
+
+	unsigned long long	pullup_copy_count;
+	unsigned long long	fixup_copy_count;
+	unsigned long		hardway_register_count;
+	unsigned long		failed_marshal_count;
+	unsigned long		bad_reply_count;
+};
+
+/*
+ * RPCRDMA transport -- encapsulates the structures above for
+ * integration with RPC.
+ *
+ * The contained structures are embedded, not pointers,
+ * for convenience. This structure need not be visible externally.
+ *
+ * It is allocated and initialized during mount, and released
+ * during unmount.
+ */
+struct rpcrdma_xprt {
+	struct rpc_xprt		xprt;
+	struct rpcrdma_ia	rx_ia;
+	struct rpcrdma_ep	rx_ep;
+	struct rpcrdma_buffer	rx_buf;
+	struct rpcrdma_create_data_internal rx_data;
+	struct delayed_work	rdma_connect;
+	struct rpcrdma_stats	rx_stats;
+};
+
+#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
+#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+
+/*
+ * Interface Adapter calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+void rpcrdma_ia_close(struct rpcrdma_ia *);
+
+/*
+ * Endpoint calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
+				struct rpcrdma_create_data_internal *);
+int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+
+int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
+				struct rpcrdma_req *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
+				struct rpcrdma_rep *);
+
+/*
+ * Buffer calls - xprtrdma/verbs.c
+ */
+int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
+				struct rpcrdma_ia *,
+				struct rpcrdma_create_data_internal *);
+void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+
+struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
+void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+
+int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
+				struct ib_mr **, struct ib_sge *);
+int rpcrdma_deregister_internal(struct rpcrdma_ia *,
+				struct ib_mr *, struct ib_sge *);
+
+int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+				int, int, struct rpcrdma_xprt *);
+int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+				struct rpcrdma_xprt *, void *);
+
+/*
+ * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+ */
+void rpcrdma_conn_func(struct rpcrdma_ep *);
+void rpcrdma_reply_handler(struct rpcrdma_rep *);
+
+/*
+ * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
+ */
+int rpcrdma_marshal_req(struct rpc_rqst *);
+
+#endif				/* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 282efd4..02298f5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -13,10 +13,14 @@
  *  (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
  *
  * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
+ *   <gilles.quillard@bull.net>
  */
 
 #include <linux/types.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/capability.h>
 #include <linux/pagemap.h>
 #include <linux/errno.h>
@@ -28,6 +32,7 @@
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
 
 #include <net/sock.h>
@@ -260,14 +265,29 @@
 #define TCP_RCV_COPY_XID	(1UL << 2)
 #define TCP_RCV_COPY_DATA	(1UL << 3)
 
-static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
 {
-	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+	return (struct sockaddr *) &xprt->addr;
+}
+
+static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
+{
+	return (struct sockaddr_in *) &xprt->addr;
+}
+
+static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
+{
+	return (struct sockaddr_in6 *) &xprt->addr;
+}
+
+static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in *addr = xs_addr_in(xprt);
 	char *buf;
 
 	buf = kzalloc(20, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 20, "%u.%u.%u.%u",
+		snprintf(buf, 20, NIPQUAD_FMT,
 				NIPQUAD(addr->sin_addr.s_addr));
 	}
 	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
@@ -279,26 +299,123 @@
 	}
 	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
 
-	if (xprt->prot == IPPROTO_UDP)
-		xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
-	else
-		xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		if (xprt->prot == IPPROTO_UDP)
+			snprintf(buf, 8, "udp");
+		else
+			snprintf(buf, 8, "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
 
 	buf = kzalloc(48, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+		snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
 			NIPQUAD(addr->sin_addr.s_addr),
 			ntohs(addr->sin_port),
 			xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
 	}
 	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+	buf = kzalloc(10, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 10, "%02x%02x%02x%02x",
+				NIPQUAD(addr->sin_addr.s_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%4hx",
+				ntohs(addr->sin_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+	buf = kzalloc(30, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+				NIPQUAD(addr->sin_addr.s_addr),
+				ntohs(addr->sin_port) >> 8,
+				ntohs(addr->sin_port) & 0xff);
+	}
+	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+	xprt->address_strings[RPC_DISPLAY_NETID] =
+		kstrdup(xprt->prot == IPPROTO_UDP ?
+			RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
+}
+
+static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in6 *addr = xs_addr_in6(xprt);
+	char *buf;
+
+	buf = kzalloc(40, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 40, NIP6_FMT,
+				NIP6(addr->sin6_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%u",
+				ntohs(addr->sin6_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		if (xprt->prot == IPPROTO_UDP)
+			snprintf(buf, 8, "udp");
+		else
+			snprintf(buf, 8, "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
+
+	buf = kzalloc(64, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
+				NIP6(addr->sin6_addr),
+				ntohs(addr->sin6_port),
+				xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+	buf = kzalloc(36, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 36, NIP6_SEQFMT,
+				NIP6(addr->sin6_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%4hx",
+				ntohs(addr->sin6_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+	buf = kzalloc(50, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 50, NIP6_FMT".%u.%u",
+				NIP6(addr->sin6_addr),
+				ntohs(addr->sin6_port) >> 8,
+				ntohs(addr->sin6_port) & 0xff);
+	}
+	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+	xprt->address_strings[RPC_DISPLAY_NETID] =
+		kstrdup(xprt->prot == IPPROTO_UDP ?
+			RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
 }
 
 static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 {
-	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
-	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
-	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+	int i;
+
+	for (i = 0; i < RPC_DISPLAY_MAX; i++)
+		kfree(xprt->address_strings[i]);
 }
 
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
@@ -463,19 +580,20 @@
 
 	req->rq_xtime = jiffies;
 	status = xs_sendpages(transport->sock,
-			      (struct sockaddr *) &xprt->addr,
+			      xs_addr(xprt),
 			      xprt->addrlen, xdr,
 			      req->rq_bytes_sent);
 
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
 			xdr->len - req->rq_bytes_sent, status);
 
-	if (likely(status >= (int) req->rq_slen))
-		return 0;
-
-	/* Still some bytes left; set up for a retry later. */
-	if (status > 0)
+	if (status >= 0) {
+		task->tk_bytes_sent += status;
+		if (status >= req->rq_slen)
+			return 0;
+		/* Still some bytes left; set up for a retry later. */
 		status = -EAGAIN;
+	}
 
 	switch (status) {
 	case -ENETUNREACH:
@@ -523,7 +641,8 @@
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
-	int status, retry = 0;
+	int status;
+	unsigned int retry = 0;
 
 	xs_encode_tcp_record_marker(&req->rq_snd_buf);
 
@@ -661,6 +780,7 @@
 	xs_free_peer_addresses(xprt);
 	kfree(xprt->slot);
 	kfree(xprt);
+	module_put(THIS_MODULE);
 }
 
 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1139,14 +1259,23 @@
  */
 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 {
-	struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+	struct sockaddr *addr = xs_addr(xprt);
 
 	dprintk("RPC:       setting port for xprt %p to %u\n", xprt, port);
 
-	sap->sin_port = htons(port);
+	switch (addr->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)addr)->sin_port = htons(port);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+		break;
+	default:
+		BUG();
+	}
 }
 
-static int xs_bind(struct sock_xprt *transport, struct socket *sock)
+static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
 		.sin_family = AF_INET,
@@ -1174,8 +1303,42 @@
 		else
 			port--;
 	} while (err == -EADDRINUSE && port != transport->port);
-	dprintk("RPC:       xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
-		NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
+	dprintk("RPC:       %s "NIPQUAD_FMT":%u: %s (%d)\n",
+			__FUNCTION__, NIPQUAD(myaddr.sin_addr),
+			port, err ? "failed" : "ok", err);
+	return err;
+}
+
+static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
+{
+	struct sockaddr_in6 myaddr = {
+		.sin6_family = AF_INET6,
+	};
+	struct sockaddr_in6 *sa;
+	int err;
+	unsigned short port = transport->port;
+
+	if (!transport->xprt.resvport)
+		port = 0;
+	sa = (struct sockaddr_in6 *)&transport->addr;
+	myaddr.sin6_addr = sa->sin6_addr;
+	do {
+		myaddr.sin6_port = htons(port);
+		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
+						sizeof(myaddr));
+		if (!transport->xprt.resvport)
+			break;
+		if (err == 0) {
+			transport->port = port;
+			break;
+		}
+		if (port <= xprt_min_resvport)
+			port = xprt_max_resvport;
+		else
+			port--;
+	} while (err == -EADDRINUSE && port != transport->port);
+	dprintk("RPC:       xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
+		NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
 	return err;
 }
 
@@ -1183,64 +1346,36 @@
 static struct lock_class_key xs_key[2];
 static struct lock_class_key xs_slock_key[2];
 
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
+
 	BUG_ON(sock_owned_by_user(sk));
-	switch (sk->sk_family) {
-	case AF_INET:
-		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
-			&xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
-		break;
+	sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
+		&xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
+}
 
-	case AF_INET6:
-		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
-			&xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
-		break;
+static inline void xs_reclassify_socket6(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
 
-	default:
-		BUG();
-	}
+	BUG_ON(sock_owned_by_user(sk));
+	sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
+		&xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
 }
 #else
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
+{
+}
+
+static inline void xs_reclassify_socket6(struct socket *sock)
 {
 }
 #endif
 
-/**
- * xs_udp_connect_worker - set up a UDP socket
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_udp_connect_worker(struct work_struct *work)
+static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
-	struct sock_xprt *transport =
-		container_of(work, struct sock_xprt, connect_worker.work);
-	struct rpc_xprt *xprt = &transport->xprt;
-	struct socket *sock = transport->sock;
-	int err, status = -EIO;
-
-	if (xprt->shutdown || !xprt_bound(xprt))
-		goto out;
-
-	/* Start by resetting any existing state */
-	xs_close(xprt);
-
-	if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
-		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
-		goto out;
-	}
-	xs_reclassify_socket(sock);
-
-	if (xs_bind(transport, sock)) {
-		sock_release(sock);
-		goto out;
-	}
-
-	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
 	if (!transport->inet) {
 		struct sock *sk = sock->sk;
@@ -1265,6 +1400,84 @@
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
 	xs_udp_do_set_buffer_size(xprt);
+}
+
+/**
+ * xs_udp_connect_worker4 - set up a UDP socket
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_udp_connect_worker4(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock = transport->sock;
+	int err, status = -EIO;
+
+	if (xprt->shutdown || !xprt_bound(xprt))
+		goto out;
+
+	/* Start by resetting any existing state */
+	xs_close(xprt);
+
+	if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
+		goto out;
+	}
+	xs_reclassify_socket4(sock);
+
+	if (xs_bind4(transport, sock)) {
+		sock_release(sock);
+		goto out;
+	}
+
+	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+	xs_udp_finish_connecting(xprt, sock);
+	status = 0;
+out:
+	xprt_wake_pending_tasks(xprt, status);
+	xprt_clear_connecting(xprt);
+}
+
+/**
+ * xs_udp_connect_worker6 - set up a UDP socket
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_udp_connect_worker6(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock = transport->sock;
+	int err, status = -EIO;
+
+	if (xprt->shutdown || !xprt_bound(xprt))
+		goto out;
+
+	/* Start by resetting any existing state */
+	xs_close(xprt);
+
+	if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
+		goto out;
+	}
+	xs_reclassify_socket6(sock);
+
+	if (xs_bind6(transport, sock) < 0) {
+		sock_release(sock);
+		goto out;
+	}
+
+	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+	xs_udp_finish_connecting(xprt, sock);
 	status = 0;
 out:
 	xprt_wake_pending_tasks(xprt, status);
@@ -1295,42 +1508,9 @@
 				result);
 }
 
-/**
- * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_tcp_connect_worker(struct work_struct *work)
+static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
-	struct sock_xprt *transport =
-		container_of(work, struct sock_xprt, connect_worker.work);
-	struct rpc_xprt *xprt = &transport->xprt;
-	struct socket *sock = transport->sock;
-	int err, status = -EIO;
-
-	if (xprt->shutdown || !xprt_bound(xprt))
-		goto out;
-
-	if (!sock) {
-		/* start from scratch */
-		if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
-			dprintk("RPC:       can't create TCP transport "
-					"socket (%d).\n", -err);
-			goto out;
-		}
-		xs_reclassify_socket(sock);
-
-		if (xs_bind(transport, sock)) {
-			sock_release(sock);
-			goto out;
-		}
-	} else
-		/* "close" the socket, preserving the local port */
-		xs_tcp_reuse_connection(xprt);
-
-	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
 	if (!transport->inet) {
 		struct sock *sk = sock->sk;
@@ -1364,8 +1544,46 @@
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
-	status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
-			xprt->addrlen, O_NONBLOCK);
+	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+}
+
+/**
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker4(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock = transport->sock;
+	int err, status = -EIO;
+
+	if (xprt->shutdown || !xprt_bound(xprt))
+		goto out;
+
+	if (!sock) {
+		/* start from scratch */
+		if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+			dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
+			goto out;
+		}
+		xs_reclassify_socket4(sock);
+
+		if (xs_bind4(transport, sock) < 0) {
+			sock_release(sock);
+			goto out;
+		}
+	} else
+		/* "close" the socket, preserving the local port */
+		xs_tcp_reuse_connection(xprt);
+
+	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+	status = xs_tcp_finish_connecting(xprt, sock);
 	dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt),
 			sock->sk->sk_state);
@@ -1391,6 +1609,66 @@
 }
 
 /**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock = transport->sock;
+	int err, status = -EIO;
+
+	if (xprt->shutdown || !xprt_bound(xprt))
+		goto out;
+
+	if (!sock) {
+		/* start from scratch */
+		if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+			dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
+			goto out;
+		}
+		xs_reclassify_socket6(sock);
+
+		if (xs_bind6(transport, sock) < 0) {
+			sock_release(sock);
+			goto out;
+		}
+	} else
+		/* "close" the socket, preserving the local port */
+		xs_tcp_reuse_connection(xprt);
+
+	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+	status = xs_tcp_finish_connecting(xprt, sock);
+	dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
+			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
+	if (status < 0) {
+		switch (status) {
+			case -EINPROGRESS:
+			case -EALREADY:
+				goto out_clear;
+			case -ECONNREFUSED:
+			case -ECONNRESET:
+				/* retry with existing socket, after a delay */
+				break;
+			default:
+				/* get rid of existing socket, and retry */
+				xs_close(xprt);
+				break;
+		}
+	}
+out:
+	xprt_wake_pending_tasks(xprt, status);
+out_clear:
+	xprt_clear_connecting(xprt);
+}
+
+/**
  * xs_connect - connect a socket to a remote endpoint
  * @task: address of RPC task that manages state of connect request
  *
@@ -1508,7 +1786,8 @@
 	.print_stats		= xs_tcp_print_stats,
 };
 
-static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
+static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
+				      unsigned int slot_table_size)
 {
 	struct rpc_xprt *xprt;
 	struct sock_xprt *new;
@@ -1549,8 +1828,9 @@
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 {
+	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 
@@ -1559,15 +1839,11 @@
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
-	if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
-		xprt_set_bound(xprt);
-
 	xprt->prot = IPPROTO_UDP;
 	xprt->tsh_size = 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1580,11 +1856,37 @@
 	else
 		xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
 
-	xs_format_peer_addresses(xprt);
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker,
+					xs_udp_connect_worker4);
+		xs_format_ipv4_peer_addresses(xprt);
+		break;
+	case AF_INET6:
+		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker,
+					xs_udp_connect_worker6);
+		xs_format_ipv6_peer_addresses(xprt);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
 	dprintk("RPC:       set up transport to address %s\n",
 			xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	return xprt;
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
 }
 
 /**
@@ -1592,8 +1894,9 @@
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 {
+	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 
@@ -1602,14 +1905,10 @@
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
-	if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
-		xprt_set_bound(xprt);
-
 	xprt->prot = IPPROTO_TCP;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1622,15 +1921,55 @@
 	else
 		xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
 
-	xs_format_peer_addresses(xprt);
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
+		xs_format_ipv4_peer_addresses(xprt);
+		break;
+	case AF_INET6:
+		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
+		xs_format_ipv6_peer_addresses(xprt);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
 	dprintk("RPC:       set up transport to address %s\n",
 			xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	return xprt;
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
 }
 
+static struct xprt_class	xs_udp_transport = {
+	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
+	.name		= "udp",
+	.owner		= THIS_MODULE,
+	.ident		= IPPROTO_UDP,
+	.setup		= xs_setup_udp,
+};
+
+static struct xprt_class	xs_tcp_transport = {
+	.list		= LIST_HEAD_INIT(xs_tcp_transport.list),
+	.name		= "tcp",
+	.owner		= THIS_MODULE,
+	.ident		= IPPROTO_TCP,
+	.setup		= xs_setup_tcp,
+};
+
 /**
- * init_socket_xprt - set up xprtsock's sysctls
+ * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  *
  */
 int init_socket_xprt(void)
@@ -1640,11 +1979,14 @@
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
 #endif
 
+	xprt_register_transport(&xs_udp_transport);
+	xprt_register_transport(&xs_tcp_transport);
+
 	return 0;
 }
 
 /**
- * cleanup_socket_xprt - remove xprtsock's sysctls
+ * cleanup_socket_xprt - remove xprtsock's sysctls, unregister
  *
  */
 void cleanup_socket_xprt(void)
@@ -1655,4 +1997,7 @@
 		sunrpc_table_header = NULL;
 	}
 #endif
+
+	xprt_unregister_transport(&xs_udp_transport);
+	xprt_unregister_transport(&xs_tcp_transport);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3c3fff3..cf76150 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3932,7 +3932,7 @@
 }
 
 static unsigned int selinux_ip_postroute_last(unsigned int hooknum,
-                                              struct sk_buff **pskb,
+                                              struct sk_buff *skb,
                                               const struct net_device *in,
                                               const struct net_device *out,
                                               int (*okfn)(struct sk_buff *),
@@ -3941,7 +3941,6 @@
 	char *addrp;
 	int len, err = 0;
 	struct sock *sk;
-	struct sk_buff *skb = *pskb;
 	struct avc_audit_data ad;
 	struct net_device *dev = (struct net_device *)out;
 	struct sk_security_struct *sksec;
@@ -3977,23 +3976,23 @@
 }
 
 static unsigned int selinux_ipv4_postroute_last(unsigned int hooknum,
-						struct sk_buff **pskb,
+						struct sk_buff *skb,
 						const struct net_device *in,
 						const struct net_device *out,
 						int (*okfn)(struct sk_buff *))
 {
-	return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET);
+	return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 
 static unsigned int selinux_ipv6_postroute_last(unsigned int hooknum,
-						struct sk_buff **pskb,
+						struct sk_buff *skb,
 						const struct net_device *in,
 						const struct net_device *out,
 						int (*okfn)(struct sk_buff *))
 {
-	return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6);
+	return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET6);
 }
 
 #endif	/* IPV6 */