Merge branch 'release' of ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6
* 'release' of ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6:
[IA64] build fix for scatterlist
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 63df226..fb8258e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -205,20 +205,6 @@
---------------------------
-What: Compaq touchscreen device emulation
-When: Oct 2007
-Files: drivers/input/tsdev.c
-Why: The code says it was obsolete when it was written in 2001.
- tslib is a userspace library which does anything tsdev can do and
- much more besides in userspace where this code belongs. There is no
- longer any need for tsdev and applications should have converted to
- use tslib by now.
- The name "tsdev" is also extremely confusing and lots of people have
- it loaded when they don't need/use it.
-Who: Richard Purdie <rpurdie@rpsys.net>
-
----------------------------
-
What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
When: September 2007
Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c323778..085e4a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1083,6 +1083,13 @@
[NFS] set the maximum lifetime for idmapper cache
entries.
+ nfs.enable_ino64=
+ [NFS] enable 64-bit inode numbers.
+ If zero, the NFS client will fake up a 32-bit inode
+ number for the readdir() and stat() syscalls instead
+ of returning the full 64-bit number.
+ The default is to return 64-bit inode numbers.
+
nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels
no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
@@ -1883,9 +1890,6 @@
Format:
<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
- tsdev.xres= [TS] Horizontal screen resolution.
- tsdev.yres= [TS] Vertical screen resolution.
-
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
Format:
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 1da5666..1134062 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -281,6 +281,39 @@
will be rounded down to the nearest multiple. The default
value is 0.
+fail_over_mac
+
+ Specifies whether active-backup mode should set all slaves to
+ the same MAC address (the traditional behavior), or, when
+ enabled, change the bond's MAC address when changing the
+ active interface (i.e., fail over the MAC address itself).
+
+ Fail over MAC is useful for devices that cannot ever alter
+ their MAC address, or for devices that refuse incoming
+ broadcasts with their own source MAC (which interferes with
+ the ARP monitor).
+
+ The down side of fail over MAC is that every device on the
+ network must be updated via gratuitous ARP, vs. just updating
+ a switch or set of switches (which often takes place for any
+ traffic, not just ARP traffic, if the switch snoops incoming
+ traffic to update its tables) for the traditional method. If
+ the gratuitous ARP is lost, communication may be disrupted.
+
+ When fail over MAC is used in conjuction with the mii monitor,
+ devices which assert link up prior to being able to actually
+ transmit and receive are particularly susecptible to loss of
+ the gratuitous ARP, and an appropriate updelay setting may be
+ required.
+
+ A value of 0 disables fail over MAC, and is the default. A
+ value of 1 enables fail over MAC. This option is enabled
+ automatically if the first slave added cannot change its MAC
+ address. This option may be modified via sysfs only when no
+ slaves are present in the bond.
+
+ This option was added in bonding version 3.2.0.
+
lacp_rate
Option specifying the rate in which we'll ask our link partner
diff --git a/Documentation/networking/proc_net_tcp.txt b/Documentation/networking/proc_net_tcp.txt
index 5e21f7c..4a79209 100644
--- a/Documentation/networking/proc_net_tcp.txt
+++ b/Documentation/networking/proc_net_tcp.txt
@@ -1,8 +1,9 @@
This document describes the interfaces /proc/net/tcp and /proc/net/tcp6.
+Note that these interfaces are deprecated in favor of tcp_diag.
These /proc interfaces provide information about currently active TCP
-connections, and are implemented by tcp_get_info() in net/ipv4/tcp_ipv4.c and
-tcp6_get_info() in net/ipv6/tcp_ipv6.c, respectively.
+connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c
+and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively.
It will first list all listening TCP sockets, and next list all established
TCP connections. A typical entry of /proc/net/tcp would look like this (split
diff --git a/MAINTAINERS b/MAINTAINERS
index 12cee3d..c7355e7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2404,6 +2404,15 @@
L: lm-sensors@lm-sensors.org
S: Maintained
+LOCKDEP AND LOCKSTAT
+P: Peter Zijlstra
+M: peterz@infradead.org
+P: Ingo Molnar
+M: mingo@redhat.com
+L: linux-kernel@vger.kernel.org
+T: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git
+S: Maintained
+
LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks)
P: Richard Russon (FlatCap)
M: ldm@flatcap.org
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 2c47db4..046e6d8 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -88,7 +88,7 @@
#endif
#if defined(CONFIG_KEYBOARD_BFIN) || defined(CONFIG_KEYBOARD_BFIN_MODULE)
-static int bf548_keymap[] = {
+static const unsigned int bf548_keymap[] = {
KEYVAL(0, 0, KEY_ENTER),
KEYVAL(0, 1, KEY_HELP),
KEYVAL(0, 2, KEY_0),
@@ -110,8 +110,8 @@
static struct bfin_kpad_platform_data bf54x_kpad_data = {
.rows = 4,
.cols = 4,
- .keymap = bf548_keymap,
- .keymapsize = ARRAY_SIZE(bf548_keymap),
+ .keymap = bf548_keymap,
+ .keymapsize = ARRAY_SIZE(bf548_keymap),
.repeat = 0,
.debounce_time = 5000, /* ns (5ms) */
.coldrive_time = 1000, /* ns (1ms) */
diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index fbbccb5..880add1 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -1,6 +1,4 @@
/*
- * linux/arch/m68k/atari/atakeyb.c
- *
* Atari Keyboard driver for 680x0 Linux
*
* This file is subject to the terms and conditions of the GNU General Public
diff --git a/arch/mips/au1000/common/prom.c b/arch/mips/au1000/common/prom.c
index a8637cd..90d7069 100644
--- a/arch/mips/au1000/common/prom.c
+++ b/arch/mips/au1000/common/prom.c
@@ -33,7 +33,6 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -41,18 +40,16 @@
#include <asm/bootinfo.h>
-/* #define DEBUG_CMDLINE */
-
-extern int prom_argc;
-extern char **prom_argv, **prom_envp;
-
+int prom_argc;
+char **prom_argv;
+char **prom_envp;
char * __init_or_module prom_getcmdline(void)
{
return &(arcs_cmdline[0]);
}
-void prom_init_cmdline(void)
+void prom_init_cmdline(void)
{
char *cp;
int actr;
@@ -61,7 +58,7 @@
cp = &(arcs_cmdline[0]);
while(actr < prom_argc) {
- strcpy(cp, prom_argv[actr]);
+ strcpy(cp, prom_argv[actr]);
cp += strlen(prom_argv[actr]);
*cp++ = ' ';
actr++;
@@ -70,10 +67,8 @@
--cp;
if (prom_argc > 1)
*cp = '\0';
-
}
-
char *prom_getenv(char *envname)
{
/*
@@ -95,21 +90,23 @@
}
env++;
}
+
return NULL;
}
-inline unsigned char str2hexnum(unsigned char c)
+static inline unsigned char str2hexnum(unsigned char c)
{
- if(c >= '0' && c <= '9')
+ if (c >= '0' && c <= '9')
return c - '0';
- if(c >= 'a' && c <= 'f')
+ if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
- if(c >= 'A' && c <= 'F')
+ if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
+
return 0; /* foo */
}
-inline void str2eaddr(unsigned char *ea, unsigned char *str)
+static inline void str2eaddr(unsigned char *ea, unsigned char *str)
{
int i;
@@ -124,35 +121,29 @@
}
}
-int get_ethernet_addr(char *ethernet_addr)
+int prom_get_ethernet_addr(char *ethernet_addr)
{
- char *ethaddr_str;
+ char *ethaddr_str;
+ char *argptr;
- ethaddr_str = prom_getenv("ethaddr");
+ /* Check the environment variables first */
+ ethaddr_str = prom_getenv("ethaddr");
if (!ethaddr_str) {
- printk("ethaddr not set in boot prom\n");
- return -1;
+ /* Check command line */
+ argptr = prom_getcmdline();
+ ethaddr_str = strstr(argptr, "ethaddr=");
+ if (!ethaddr_str)
+ return -1;
+
+ ethaddr_str += strlen("ethaddr=");
}
+
str2eaddr(ethernet_addr, ethaddr_str);
-#if 0
- {
- int i;
-
- printk("get_ethernet_addr: ");
- for (i=0; i<5; i++)
- printk("%02x:", (unsigned char)*(ethernet_addr+i));
- printk("%02x\n", *(ethernet_addr+i));
- }
-#endif
-
return 0;
}
+EXPORT_SYMBOL(prom_get_ethernet_addr);
void __init prom_free_prom_memory(void)
{
}
-
-EXPORT_SYMBOL(prom_getcmdline);
-EXPORT_SYMBOL(get_ethernet_addr);
-EXPORT_SYMBOL(str2eaddr);
diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c
index b212c07..a90d425 100644
--- a/arch/mips/au1000/common/setup.c
+++ b/arch/mips/au1000/common/setup.c
@@ -40,10 +40,11 @@
#include <asm/mipsregs.h>
#include <asm/reboot.h>
#include <asm/pgtable.h>
-#include <asm/mach-au1x00/au1000.h>
#include <asm/time.h>
-extern char * prom_getcmdline(void);
+#include <au1000.h>
+#include <prom.h>
+
extern void __init board_setup(void);
extern void au1000_restart(char *);
extern void au1000_halt(void);
diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c
index 4d7bcfc..43298fd 100644
--- a/arch/mips/au1000/db1x00/init.c
+++ b/arch/mips/au1000/db1x00/init.c
@@ -31,15 +31,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c
index 2aa7b2e..cdeae32 100644
--- a/arch/mips/au1000/mtx-1/init.c
+++ b/arch/mips/au1000/mtx-1/init.c
@@ -34,13 +34,11 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
+
#include <asm/addrspace.h>
#include <asm/bootinfo.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c
index 4535f72..ddccaf6 100644
--- a/arch/mips/au1000/pb1000/init.c
+++ b/arch/mips/au1000/pb1000/init.c
@@ -30,15 +30,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c
index 7ba6852..c93fd39 100644
--- a/arch/mips/au1000/pb1100/init.c
+++ b/arch/mips/au1000/pb1100/init.c
@@ -31,15 +31,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/pb1200/board_setup.c b/arch/mips/au1000/pb1200/board_setup.c
index 2122515..5dbc986 100644
--- a/arch/mips/au1000/pb1200/board_setup.c
+++ b/arch/mips/au1000/pb1200/board_setup.c
@@ -41,8 +41,10 @@
#include <asm/mipsregs.h>
#include <asm/reboot.h>
#include <asm/pgtable.h>
-#include <asm/mach-au1x00/au1000.h>
-#include <asm/mach-au1x00/au1xxx_dbdma.h>
+
+#include <au1000.h>
+#include <au1xxx_dbdma.h>
+#include <prom.h>
#ifdef CONFIG_MIPS_PB1200
#include <asm/mach-pb1x00/pb1200.h>
diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c
index 5a70029..c251570 100644
--- a/arch/mips/au1000/pb1200/init.c
+++ b/arch/mips/au1000/pb1200/init.c
@@ -31,15 +31,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c
index e58a9d6..507d4b2 100644
--- a/arch/mips/au1000/pb1500/init.c
+++ b/arch/mips/au1000/pb1500/init.c
@@ -31,15 +31,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c
index fad53bf..b03eee6 100644
--- a/arch/mips/au1000/pb1550/init.c
+++ b/arch/mips/au1000/pb1550/init.c
@@ -31,15 +31,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c
index 9f839c3..6532939 100644
--- a/arch/mips/au1000/xxs1500/init.c
+++ b/arch/mips/au1000/xxs1500/init.c
@@ -30,15 +30,13 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
#include <linux/string.h>
#include <linux/kernel.h>
-int prom_argc;
-char **prom_argv, **prom_envp;
-extern void __init prom_init_cmdline(void);
-extern char *prom_getenv(char *envname);
+#include <asm/addrspace.h>
+#include <asm/bootinfo.h>
+
+#include <prom.h>
const char *get_system_type(void)
{
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 1245b2f..095988f 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -77,12 +77,7 @@
{
pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
- dcr_write(msic->dcr_host, msic->dcr_host.base + dcr_n, val);
-}
-
-static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n)
-{
- return dcr_read(msic->dcr_host, msic->dcr_host.base + dcr_n);
+ dcr_write(msic->dcr_host, dcr_n, val);
}
static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
@@ -91,7 +86,7 @@
u32 write_offset, msi;
int idx;
- write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG);
+ write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
/* write_offset doesn't wrap properly, so we have to mask it */
@@ -306,7 +301,7 @@
list_for_each_entry(msic, &axon_msic_list, list) {
pr_debug("axon_msi: disabling %s\n",
msic->irq_host->of_node->full_name);
- tmp = msic_dcr_read(msic, MSIC_CTRL_REG);
+ tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
}
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index ab11c0b..427027c 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -126,13 +126,13 @@
}
EXPORT_SYMBOL_GPL(dcr_map);
-void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c)
+void dcr_unmap(dcr_host_t host, unsigned int dcr_c)
{
dcr_host_t h = host;
if (h.token == NULL)
return;
- h.token += dcr_n * h.stride;
+ h.token += host.base * h.stride;
iounmap(h.token);
h.token = NULL;
}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 893e654..e479388 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -156,7 +156,7 @@
switch(type) {
#ifdef CONFIG_PPC_DCR
case mpic_access_dcr:
- return dcr_read(rb->dhost, rb->dhost.base + reg);
+ return dcr_read(rb->dhost, reg);
#endif
case mpic_access_mmio_be:
return in_be32(rb->base + (reg >> 2));
@@ -173,7 +173,7 @@
switch(type) {
#ifdef CONFIG_PPC_DCR
case mpic_access_dcr:
- return dcr_write(rb->dhost, rb->dhost.base + reg, value);
+ return dcr_write(rb->dhost, reg, value);
#endif
case mpic_access_mmio_be:
return out_be32(rb->base + (reg >> 2), value);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index f3bceb1..139ca15 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -68,9 +68,15 @@
l %r1,BASED(.Ltrace_irq_off)
basr %r14,%r1
.endm
+
+ .macro LOCKDEP_SYS_EXIT
+ l %r1,BASED(.Llockdep_sys_exit)
+ basr %r14,%r1
+ .endm
#else
#define TRACE_IRQS_ON
#define TRACE_IRQS_OFF
+#define LOCKDEP_SYS_EXIT
#endif
/*
@@ -260,6 +266,7 @@
bno BASED(sysc_leave)
tm __TI_flags+3(%r9),_TIF_WORK_SVC
bnz BASED(sysc_work) # there is work to do (signals etc.)
+ LOCKDEP_SYS_EXIT
sysc_leave:
RESTORE_ALL __LC_RETURN_PSW,1
@@ -283,6 +290,7 @@
bo BASED(sysc_restart)
tm __TI_flags+3(%r9),_TIF_SINGLE_STEP
bo BASED(sysc_singlestep)
+ LOCKDEP_SYS_EXIT
b BASED(sysc_leave)
#
@@ -572,6 +580,7 @@
#endif
tm __TI_flags+3(%r9),_TIF_WORK_INT
bnz BASED(io_work) # there is work to do (signals etc.)
+ LOCKDEP_SYS_EXIT
io_leave:
RESTORE_ALL __LC_RETURN_PSW,0
io_done:
@@ -618,6 +627,7 @@
bo BASED(io_reschedule)
tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
bnz BASED(io_sigpending)
+ LOCKDEP_SYS_EXIT
b BASED(io_leave)
#
@@ -1040,6 +1050,8 @@
.Ltrace_irq_on: .long trace_hardirqs_on
.Ltrace_irq_off:
.long trace_hardirqs_off
+.Llockdep_sys_exit:
+ .long lockdep_sys_exit
#endif
.Lcritical_start:
.long __critical_start + 0x80000000
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9c0d5cc..05e26d1 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -66,9 +66,14 @@
.macro TRACE_IRQS_OFF
brasl %r14,trace_hardirqs_off
.endm
+
+ .macro LOCKDEP_SYS_EXIT
+ brasl %r14,lockdep_sys_exit
+ .endm
#else
#define TRACE_IRQS_ON
#define TRACE_IRQS_OFF
+#define LOCKDEP_SYS_EXIT
#endif
.macro STORE_TIMER lc_offset
@@ -255,6 +260,7 @@
jno sysc_leave
tm __TI_flags+7(%r9),_TIF_WORK_SVC
jnz sysc_work # there is work to do (signals etc.)
+ LOCKDEP_SYS_EXIT
sysc_leave:
RESTORE_ALL __LC_RETURN_PSW,1
@@ -278,6 +284,7 @@
jo sysc_restart
tm __TI_flags+7(%r9),_TIF_SINGLE_STEP
jo sysc_singlestep
+ LOCKDEP_SYS_EXIT
j sysc_leave
#
@@ -558,6 +565,7 @@
#endif
tm __TI_flags+7(%r9),_TIF_WORK_INT
jnz io_work # there is work to do (signals etc.)
+ LOCKDEP_SYS_EXIT
io_leave:
RESTORE_ALL __LC_RETURN_PSW,0
io_done:
@@ -605,6 +613,7 @@
jo io_reschedule
tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
jnz io_sigpending
+ LOCKDEP_SYS_EXIT
j io_leave
#
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 290b7bc..8099fea0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -251,6 +251,7 @@
jb resume_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
+ LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
@@ -338,6 +339,7 @@
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
+ LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
@@ -377,6 +379,7 @@
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp) # store the return value
syscall_exit:
+ LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
@@ -467,6 +470,7 @@
jz work_notifysig
work_resched:
call schedule
+ LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1d232e5..f1cacd4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -244,6 +244,7 @@
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
sysret_check:
+ LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
cli
TRACE_IRQS_OFF
@@ -333,6 +334,7 @@
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
int_with_check:
+ LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
@@ -544,11 +546,13 @@
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
retint_check:
+ LOCKDEP_SYS_EXIT_IRQ
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
CFI_REMEMBER_STATE
jnz retint_careful
-retint_swapgs:
+
+retint_swapgs: /* return to user-space */
/*
* The iretq could re-enable interrupts:
*/
@@ -557,7 +561,7 @@
swapgs
jmp restore_args
-retint_restore_args:
+retint_restore_args: /* return to kernel space */
cli
/*
* The iretq could re-enable interrupts:
@@ -866,26 +870,21 @@
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
call *%rax
- /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
-error_exit:
- movl %ebx,%eax
+ /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
+error_exit:
+ movl %ebx,%eax
RESTORE_REST
cli
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
jne retint_kernel
+ LOCKDEP_SYS_EXIT_IRQ
movl threadinfo_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
andl %edi,%edx
jnz retint_careful
- /*
- * The iret might restore flags:
- */
- TRACE_IRQS_IRETQ
- swapgs
- RESTORE_ARGS 0,8,0
- jmp iret_label
+ jmp retint_swapgs
CFI_ENDPROC
error_kernelspace:
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index c2d03e9..e7d0d3c 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -557,6 +557,12 @@
resume_execution(cur, regs, kcb);
regs->eflags |= kcb->kprobe_saved_eflags;
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+ if (raw_irqs_disabled_flags(regs->eflags))
+ trace_hardirqs_off();
+ else
+ trace_hardirqs_on();
+#endif
/*Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -694,6 +700,7 @@
memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
MIN_STACK_SIZE(addr));
regs->eflags &= ~IF_MASK;
+ trace_hardirqs_off();
regs->eip = (unsigned long)(jp->entry);
return 1;
}
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 1df17a0..62e28e5 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -544,6 +544,12 @@
resume_execution(cur, regs, kcb);
regs->eflags |= kcb->kprobe_saved_rflags;
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+ if (raw_irqs_disabled_flags(regs->eflags))
+ trace_hardirqs_off();
+ else
+ trace_hardirqs_on();
+#endif
/* Restore the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -684,6 +690,7 @@
memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
MIN_STACK_SIZE(addr));
regs->eflags &= ~IF_MASK;
+ trace_hardirqs_off();
regs->rip = (unsigned long)(jp->entry);
return 1;
}
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 55e586d..6ea73f3 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -50,6 +50,10 @@
thunk trace_hardirqs_on_thunk,trace_hardirqs_on
thunk trace_hardirqs_off_thunk,trace_hardirqs_off
#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ thunk lockdep_sys_exit_thunk,lockdep_sys_exit
+#endif
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
CFI_STARTPROC
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 4672066..33f5eb0 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -272,6 +272,15 @@
If unsure, say N.
+config PATA_CS5536
+ tristate "CS5536 PATA support (Experimental)"
+ depends on PCI && X86 && !X86_64 && EXPERIMENTAL
+ help
+ This option enables support for the AMD CS5536
+ companion chip used with the Geode LX processor family.
+
+ If unsure, say N.
+
config PATA_CYPRESS
tristate "Cypress CY82C693 PATA support (Very Experimental)"
depends on PCI && EXPERIMENTAL
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 2a63645..6bdc307 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -28,6 +28,7 @@
obj-$(CONFIG_PATA_CS5520) += pata_cs5520.o
obj-$(CONFIG_PATA_CS5530) += pata_cs5530.o
obj-$(CONFIG_PATA_CS5535) += pata_cs5535.o
+obj-$(CONFIG_PATA_CS5536) += pata_cs5536.o
obj-$(CONFIG_PATA_CYPRESS) += pata_cypress.o
obj-$(CONFIG_PATA_EFAR) += pata_efar.o
obj-$(CONFIG_PATA_HPT366) += pata_hpt366.o
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 9ce4aa9..3c6f43e 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -130,6 +130,7 @@
ich8_sata_ahci = 9,
piix_pata_mwdma = 10, /* PIIX3 MWDMA only */
tolapai_sata_ahci = 11,
+ ich9_2port_sata = 12,
/* constants for mapping table */
P0 = 0, /* port 0 */
@@ -238,19 +239,19 @@
/* SATA Controller 1 IDE (ICH8) */
{ 0x8086, 0x2820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
/* SATA Controller 2 IDE (ICH8) */
- { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+ { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
/* Mobile SATA Controller IDE (ICH8M) */
{ 0x8086, 0x2828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
/* SATA Controller IDE (ICH9) */
{ 0x8086, 0x2920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
/* SATA Controller IDE (ICH9) */
- { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+ { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
/* SATA Controller IDE (ICH9) */
- { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+ { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
/* SATA Controller IDE (ICH9M) */
- { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+ { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
/* SATA Controller IDE (ICH9M) */
- { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
+ { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata },
/* SATA Controller IDE (ICH9M) */
{ 0x8086, 0x292e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci },
/* SATA Controller IDE (Tolapai) */
@@ -448,6 +449,18 @@
},
};
+static const struct piix_map_db ich9_2port_map_db = {
+ .mask = 0x3,
+ .port_enable = 0x3,
+ .map = {
+ /* PM PS SM SS MAP */
+ { P0, NA, P1, NA }, /* 00b */
+ { RV, RV, RV, RV }, /* 01b */
+ { RV, RV, RV, RV }, /* 10b */
+ { RV, RV, RV, RV },
+ },
+};
+
static const struct piix_map_db *piix_map_db_table[] = {
[ich5_sata] = &ich5_map_db,
[ich6_sata] = &ich6_map_db,
@@ -455,6 +468,7 @@
[ich6m_sata_ahci] = &ich6m_map_db,
[ich8_sata_ahci] = &ich8_map_db,
[tolapai_sata_ahci] = &tolapai_map_db,
+ [ich9_2port_sata] = &ich9_2port_map_db,
};
static struct ata_port_info piix_port_info[] = {
@@ -570,6 +584,17 @@
.udma_mask = ATA_UDMA6,
.port_ops = &piix_sata_ops,
},
+
+ [ich9_2port_sata] =
+ {
+ .sht = &piix_sht,
+ .flags = PIIX_SATA_FLAGS | PIIX_FLAG_SCR |
+ PIIX_FLAG_AHCI,
+ .pio_mask = 0x1f, /* pio0-4 */
+ .mwdma_mask = 0x07, /* mwdma0-2 */
+ .udma_mask = ATA_UDMA6,
+ .port_ops = &piix_sata_ops,
+ },
};
static struct pci_bits piix_enable_bits[] = {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b05384a..68699b3 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3984,6 +3984,7 @@
{ "ST9120822AS", "3.CLF", ATA_HORKAGE_NONCQ, },
{ "ST9160821AS", "3.CLF", ATA_HORKAGE_NONCQ, },
{ "ST9160821AS", "3.ALD", ATA_HORKAGE_NONCQ, },
+ { "ST9160821AS", "3.CCD", ATA_HORKAGE_NONCQ, },
{ "ST3160812AS", "3.ADJ", ATA_HORKAGE_NONCQ, },
{ "ST980813AS", "3.ADB", ATA_HORKAGE_NONCQ, },
{ "SAMSUNG HD401LJ", "ZZ100-15", ATA_HORKAGE_NONCQ, },
@@ -4013,8 +4014,14 @@
p = strchr(patt, wildchar);
if (p && ((*(p + 1)) == 0))
len = p - patt;
- else
+ else {
len = strlen(name);
+ if (!len) {
+ if (!*patt)
+ return 0;
+ return -1;
+ }
+ }
return strncmp(patt, name, len);
}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index ea53e6a..d63c81e 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1363,6 +1363,7 @@
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
+ struct ata_eh_info *ehi = &qc->dev->link->eh_info;
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
int need_sense = (qc->err_mask != 0);
@@ -1376,14 +1377,14 @@
case ATA_CMD_SET_FEATURES:
if ((qc->tf.feature == SETFEATURES_WC_ON) ||
(qc->tf.feature == SETFEATURES_WC_OFF)) {
- ap->link.eh_info.action |= ATA_EH_REVALIDATE;
+ ehi->action |= ATA_EH_REVALIDATE;
ata_port_schedule_eh(ap);
}
break;
case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
case ATA_CMD_SET_MULTI: /* multi_count changed */
- ap->link.eh_info.action |= ATA_EH_REVALIDATE;
+ ehi->action |= ATA_EH_REVALIDATE;
ata_port_schedule_eh(ap);
break;
}
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
new file mode 100644
index 0000000..53070f6
--- /dev/null
+++ b/drivers/ata/pata_cs5536.c
@@ -0,0 +1,344 @@
+/*
+ * pata_cs5536.c - CS5536 PATA for new ATA layer
+ * (C) 2007 Martin K. Petersen <mkp@mkp.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Documentation:
+ * Available from AMD web site.
+ *
+ * The IDE timing registers for the CS5536 live in the Geode Machine
+ * Specific Register file and not PCI config space. Most BIOSes
+ * virtualize the PCI registers so the chip looks like a standard IDE
+ * controller. Unfortunately not all implementations get this right.
+ * In particular some have problems with unaligned accesses to the
+ * virtualized PCI registers. This driver always does full dword
+ * writes to work around the issue. Also, in case of a bad BIOS this
+ * driver can be loaded with the "msr=1" parameter which forces using
+ * the Machine Specific Registers to configure the device.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/libata.h>
+#include <scsi/scsi_host.h>
+#include <asm/msr.h>
+
+#define DRV_NAME "pata_cs5536"
+#define DRV_VERSION "0.0.5"
+
+enum {
+ CFG = 0,
+ DTC = 1,
+ CAST = 2,
+ ETC = 3,
+
+ MSR_IDE_BASE = 0x51300000,
+ MSR_IDE_CFG = (MSR_IDE_BASE + 0x10),
+ MSR_IDE_DTC = (MSR_IDE_BASE + 0x12),
+ MSR_IDE_CAST = (MSR_IDE_BASE + 0x13),
+ MSR_IDE_ETC = (MSR_IDE_BASE + 0x14),
+
+ PCI_IDE_CFG = 0x40,
+ PCI_IDE_DTC = 0x48,
+ PCI_IDE_CAST = 0x4c,
+ PCI_IDE_ETC = 0x50,
+
+ IDE_CFG_CHANEN = 0x2,
+ IDE_CFG_CABLE = 0x10000,
+
+ IDE_D0_SHIFT = 24,
+ IDE_D1_SHIFT = 16,
+ IDE_DRV_MASK = 0xff,
+
+ IDE_CAST_D0_SHIFT = 6,
+ IDE_CAST_D1_SHIFT = 4,
+ IDE_CAST_DRV_MASK = 0x3,
+ IDE_CAST_CMD_MASK = 0xff,
+ IDE_CAST_CMD_SHIFT = 24,
+
+ IDE_ETC_NODMA = 0x03,
+};
+
+static int use_msr;
+
+static const u32 msr_reg[4] = {
+ MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC,
+};
+
+static const u8 pci_reg[4] = {
+ PCI_IDE_CFG, PCI_IDE_DTC, PCI_IDE_CAST, PCI_IDE_ETC,
+};
+
+static inline int cs5536_read(struct pci_dev *pdev, int reg, int *val)
+{
+ if (unlikely(use_msr)) {
+ u32 dummy;
+
+ rdmsr(msr_reg[reg], *val, dummy);
+ return 0;
+ }
+
+ return pci_read_config_dword(pdev, pci_reg[reg], val);
+}
+
+static inline int cs5536_write(struct pci_dev *pdev, int reg, int val)
+{
+ if (unlikely(use_msr)) {
+ wrmsr(msr_reg[reg], val, 0);
+ return 0;
+ }
+
+ return pci_write_config_dword(pdev, pci_reg[reg], val);
+}
+
+/**
+ * cs5536_cable_detect - detect cable type
+ * @ap: Port to detect on
+ * @deadline: deadline jiffies for the operation
+ *
+ * Perform cable detection for ATA66 capable cable. Return a libata
+ * cable type.
+ */
+
+static int cs5536_cable_detect(struct ata_port *ap)
+{
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ u32 cfg;
+
+ cs5536_read(pdev, CFG, &cfg);
+
+ if (cfg & (IDE_CFG_CABLE << ap->port_no))
+ return ATA_CBL_PATA80;
+ else
+ return ATA_CBL_PATA40;
+}
+
+/**
+ * cs5536_set_piomode - PIO setup
+ * @ap: ATA interface
+ * @adev: device on the interface
+ */
+
+static void cs5536_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+ static const u8 drv_timings[5] = {
+ 0x98, 0x55, 0x32, 0x21, 0x20,
+ };
+
+ static const u8 addr_timings[5] = {
+ 0x2, 0x1, 0x0, 0x0, 0x0,
+ };
+
+ static const u8 cmd_timings[5] = {
+ 0x99, 0x92, 0x90, 0x22, 0x20,
+ };
+
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ struct ata_device *pair = ata_dev_pair(adev);
+ int mode = adev->pio_mode - XFER_PIO_0;
+ int cmdmode = mode;
+ int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT;
+ int cshift = ap->port_no ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
+ u32 dtc, cast, etc;
+
+ if (pair)
+ cmdmode = min(mode, pair->pio_mode - XFER_PIO_0);
+
+ cs5536_read(pdev, DTC, &dtc);
+ cs5536_read(pdev, CAST, &cast);
+ cs5536_read(pdev, ETC, &etc);
+
+ dtc &= ~(IDE_DRV_MASK << dshift);
+ dtc |= drv_timings[mode] << dshift;
+
+ cast &= ~(IDE_CAST_DRV_MASK << cshift);
+ cast |= addr_timings[mode] << cshift;
+
+ cast &= ~(IDE_CAST_CMD_MASK << IDE_CAST_CMD_SHIFT);
+ cast |= cmd_timings[cmdmode] << IDE_CAST_CMD_SHIFT;
+
+ etc &= ~(IDE_DRV_MASK << dshift);
+ etc |= IDE_ETC_NODMA << dshift;
+
+ cs5536_write(pdev, DTC, dtc);
+ cs5536_write(pdev, CAST, cast);
+ cs5536_write(pdev, ETC, etc);
+}
+
+/**
+ * cs5536_set_dmamode - DMA timing setup
+ * @ap: ATA interface
+ * @adev: Device being configured
+ *
+ */
+
+static void cs5536_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+ static const u8 udma_timings[6] = {
+ 0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6,
+ };
+
+ static const u8 mwdma_timings[3] = {
+ 0x67, 0x21, 0x20,
+ };
+
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ u32 dtc, etc;
+ int mode = adev->dma_mode;
+ int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT;
+
+ if (mode >= XFER_UDMA_0) {
+ cs5536_read(pdev, ETC, &etc);
+
+ etc &= ~(IDE_DRV_MASK << dshift);
+ etc |= udma_timings[mode - XFER_UDMA_0] << dshift;
+
+ cs5536_write(pdev, ETC, etc);
+ } else { /* MWDMA */
+ cs5536_read(pdev, DTC, &dtc);
+
+ dtc &= ~(IDE_DRV_MASK << dshift);
+ dtc |= mwdma_timings[mode] << dshift;
+
+ cs5536_write(pdev, DTC, dtc);
+ }
+}
+
+static struct scsi_host_template cs5536_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
+ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .can_queue = ATA_DEF_QUEUE,
+ .this_id = ATA_SHT_THIS_ID,
+ .sg_tablesize = LIBATA_MAX_PRD,
+ .cmd_per_lun = ATA_SHT_CMD_PER_LUN,
+ .emulated = ATA_SHT_EMULATED,
+ .use_clustering = ATA_SHT_USE_CLUSTERING,
+ .proc_name = DRV_NAME,
+ .dma_boundary = ATA_DMA_BOUNDARY,
+ .slave_configure = ata_scsi_slave_config,
+ .slave_destroy = ata_scsi_slave_destroy,
+ .bios_param = ata_std_bios_param,
+};
+
+static struct ata_port_operations cs5536_port_ops = {
+ .set_piomode = cs5536_set_piomode,
+ .set_dmamode = cs5536_set_dmamode,
+ .mode_filter = ata_pci_default_filter,
+
+ .tf_load = ata_tf_load,
+ .tf_read = ata_tf_read,
+ .check_status = ata_check_status,
+ .exec_command = ata_exec_command,
+ .dev_select = ata_std_dev_select,
+
+ .freeze = ata_bmdma_freeze,
+ .thaw = ata_bmdma_thaw,
+ .error_handler = ata_bmdma_error_handler,
+ .post_internal_cmd = ata_bmdma_post_internal_cmd,
+ .cable_detect = cs5536_cable_detect,
+
+ .bmdma_setup = ata_bmdma_setup,
+ .bmdma_start = ata_bmdma_start,
+ .bmdma_stop = ata_bmdma_stop,
+ .bmdma_status = ata_bmdma_status,
+
+ .qc_prep = ata_qc_prep,
+ .qc_issue = ata_qc_issue_prot,
+
+ .data_xfer = ata_data_xfer,
+
+ .irq_handler = ata_interrupt,
+ .irq_clear = ata_bmdma_irq_clear,
+ .irq_on = ata_irq_on,
+
+ .port_start = ata_port_start,
+};
+
+/**
+ * cs5536_init_one
+ * @dev: PCI device
+ * @id: Entry in match table
+ *
+ */
+
+static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ static const struct ata_port_info info = {
+ .sht = &cs5536_sht,
+ .flags = ATA_FLAG_SLAVE_POSS,
+ .pio_mask = 0x1f,
+ .mwdma_mask = 0x07,
+ .udma_mask = ATA_UDMA5,
+ .port_ops = &cs5536_port_ops,
+ };
+
+ const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info };
+ u32 cfg;
+
+ if (use_msr)
+ printk(KERN_ERR DRV_NAME ": Using MSR regs instead of PCI\n");
+
+ cs5536_read(dev, CFG, &cfg);
+
+ if ((cfg & IDE_CFG_CHANEN) == 0) {
+ printk(KERN_ERR DRV_NAME ": disabled by BIOS\n");
+ return -ENODEV;
+ }
+
+ return ata_pci_init_one(dev, ppi);
+}
+
+static const struct pci_device_id cs5536[] = {
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_IDE), },
+ { },
+};
+
+static struct pci_driver cs5536_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = cs5536,
+ .probe = cs5536_init_one,
+ .remove = ata_pci_remove_one,
+#ifdef CONFIG_PM
+ .suspend = ata_pci_device_suspend,
+ .resume = ata_pci_device_resume,
+#endif
+};
+
+static int __init cs5536_init(void)
+{
+ return pci_register_driver(&cs5536_pci_driver);
+}
+
+static void __exit cs5536_exit(void)
+{
+ pci_unregister_driver(&cs5536_pci_driver);
+}
+
+MODULE_AUTHOR("Martin K. Petersen");
+MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cs5536);
+MODULE_VERSION(DRV_VERSION);
+module_param_named(msr, use_msr, int, 0644);
+MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
+
+module_init(cs5536_init);
+module_exit(cs5536_exit);
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 782ff4a..5db2013 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -353,6 +353,7 @@
static struct pcmcia_device_id pcmcia_devices[] = {
PCMCIA_DEVICE_FUNC_ID(4),
+ PCMCIA_DEVICE_MANF_CARD(0x0000, 0x0000), /* Corsair */
PCMCIA_DEVICE_MANF_CARD(0x0007, 0x0000), /* Hitachi */
PCMCIA_DEVICE_MANF_CARD(0x000a, 0x0000), /* I-O Data CFA */
PCMCIA_DEVICE_MANF_CARD(0x001c, 0x0001), /* Mitsubishi CFA */
@@ -378,6 +379,7 @@
PCMCIA_DEVICE_PROD_ID12("EXP ", "CD-ROM", 0x0a5c52fd, 0x66536591),
PCMCIA_DEVICE_PROD_ID12("EXP ", "PnPIDE", 0x0a5c52fd, 0x0c694728),
PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e),
+ PCMCIA_DEVICE_PROD_ID12("Hyperstone", "Model1", 0x3d5b9ef5, 0xca6ab420),
PCMCIA_DEVICE_PROD_ID12("HITACHI", "FLASH", 0xf4f43949, 0x9eb86aae),
PCMCIA_DEVICE_PROD_ID12("HITACHI", "microdrive", 0xf4f43949, 0xa6d76178),
PCMCIA_DEVICE_PROD_ID12("IBM", "microdrive", 0xb569a6e5, 0xa6d76178),
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 2eb75cd..4dc2e73 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -279,7 +279,7 @@
* Returns the final clock settings.
*/
-static u8 sil680_init_chip(struct pci_dev *pdev)
+static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio)
{
u32 class_rev = 0;
u8 tmpbyte = 0;
@@ -297,6 +297,8 @@
dev_dbg(&pdev->dev, "sil680: BA5_EN = %d clock = %02X\n",
tmpbyte & 1, tmpbyte & 0x30);
+ *try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5);
+
switch(tmpbyte & 0x30) {
case 0x00:
/* 133 clock attempt to force it on */
@@ -361,25 +363,76 @@
};
const struct ata_port_info *ppi[] = { &info, NULL };
static int printed_version;
+ struct ata_host *host;
+ void __iomem *mmio_base;
+ int rc, try_mmio;
if (!printed_version++)
dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n");
- switch(sil680_init_chip(pdev))
- {
+ switch (sil680_init_chip(pdev, &try_mmio)) {
case 0:
ppi[0] = &info_slow;
break;
case 0x30:
return -ENODEV;
}
+
+ if (!try_mmio)
+ goto use_ioports;
+
+ /* Try to acquire MMIO resources and fallback to PIO if
+ * that fails
+ */
+ rc = pcim_enable_device(pdev);
+ if (rc)
+ return rc;
+ rc = pcim_iomap_regions(pdev, 1 << SIL680_MMIO_BAR, DRV_NAME);
+ if (rc)
+ goto use_ioports;
+
+ /* Allocate host and set it up */
+ host = ata_host_alloc_pinfo(&pdev->dev, ppi, 2);
+ if (!host)
+ return -ENOMEM;
+ host->iomap = pcim_iomap_table(pdev);
+
+ /* Setup DMA masks */
+ rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+ return rc;
+ rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+ if (rc)
+ return rc;
+ pci_set_master(pdev);
+
+ /* Get MMIO base and initialize port addresses */
+ mmio_base = host->iomap[SIL680_MMIO_BAR];
+ host->ports[0]->ioaddr.bmdma_addr = mmio_base + 0x00;
+ host->ports[0]->ioaddr.cmd_addr = mmio_base + 0x80;
+ host->ports[0]->ioaddr.ctl_addr = mmio_base + 0x8a;
+ host->ports[0]->ioaddr.altstatus_addr = mmio_base + 0x8a;
+ ata_std_ports(&host->ports[0]->ioaddr);
+ host->ports[1]->ioaddr.bmdma_addr = mmio_base + 0x08;
+ host->ports[1]->ioaddr.cmd_addr = mmio_base + 0xc0;
+ host->ports[1]->ioaddr.ctl_addr = mmio_base + 0xca;
+ host->ports[1]->ioaddr.altstatus_addr = mmio_base + 0xca;
+ ata_std_ports(&host->ports[1]->ioaddr);
+
+ /* Register & activate */
+ return ata_host_activate(host, pdev->irq, ata_interrupt, IRQF_SHARED,
+ &sil680_sht);
+
+use_ioports:
return ata_pci_init_one(pdev, ppi);
}
#ifdef CONFIG_PM
static int sil680_reinit_one(struct pci_dev *pdev)
{
- sil680_init_chip(pdev);
+ int try_mmio;
+
+ sil680_init_chip(pdev, &try_mmio);
return ata_pci_device_resume(pdev);
}
#endif
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 40557fe2..240a892 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -169,6 +169,35 @@
NV_ADMA_PORT_REGISTER_MODE = (1 << 0),
NV_ADMA_ATAPI_SETUP_COMPLETE = (1 << 1),
+ /* MCP55 reg offset */
+ NV_CTL_MCP55 = 0x400,
+ NV_INT_STATUS_MCP55 = 0x440,
+ NV_INT_ENABLE_MCP55 = 0x444,
+ NV_NCQ_REG_MCP55 = 0x448,
+
+ /* MCP55 */
+ NV_INT_ALL_MCP55 = 0xffff,
+ NV_INT_PORT_SHIFT_MCP55 = 16, /* each port occupies 16 bits */
+ NV_INT_MASK_MCP55 = NV_INT_ALL_MCP55 & 0xfffd,
+
+ /* SWNCQ ENABLE BITS*/
+ NV_CTL_PRI_SWNCQ = 0x02,
+ NV_CTL_SEC_SWNCQ = 0x04,
+
+ /* SW NCQ status bits*/
+ NV_SWNCQ_IRQ_DEV = (1 << 0),
+ NV_SWNCQ_IRQ_PM = (1 << 1),
+ NV_SWNCQ_IRQ_ADDED = (1 << 2),
+ NV_SWNCQ_IRQ_REMOVED = (1 << 3),
+
+ NV_SWNCQ_IRQ_BACKOUT = (1 << 4),
+ NV_SWNCQ_IRQ_SDBFIS = (1 << 5),
+ NV_SWNCQ_IRQ_DHREGFIS = (1 << 6),
+ NV_SWNCQ_IRQ_DMASETUP = (1 << 7),
+
+ NV_SWNCQ_IRQ_HOTPLUG = NV_SWNCQ_IRQ_ADDED |
+ NV_SWNCQ_IRQ_REMOVED,
+
};
/* ADMA Physical Region Descriptor - one SG segment */
@@ -226,6 +255,42 @@
unsigned long type;
};
+struct defer_queue {
+ u32 defer_bits;
+ unsigned int head;
+ unsigned int tail;
+ unsigned int tag[ATA_MAX_QUEUE];
+};
+
+enum ncq_saw_flag_list {
+ ncq_saw_d2h = (1U << 0),
+ ncq_saw_dmas = (1U << 1),
+ ncq_saw_sdb = (1U << 2),
+ ncq_saw_backout = (1U << 3),
+};
+
+struct nv_swncq_port_priv {
+ struct ata_prd *prd; /* our SG list */
+ dma_addr_t prd_dma; /* and its DMA mapping */
+ void __iomem *sactive_block;
+ void __iomem *irq_block;
+ void __iomem *tag_block;
+ u32 qc_active;
+
+ unsigned int last_issue_tag;
+
+ /* fifo circular queue to store deferral command */
+ struct defer_queue defer_queue;
+
+ /* for NCQ interrupt analysis */
+ u32 dhfis_bits;
+ u32 dmafis_bits;
+ u32 sdbfis_bits;
+
+ unsigned int ncq_flags;
+};
+
+
#define NV_ADMA_CHECK_INTR(GCTL, PORT) ((GCTL) & ( 1 << (19 + (12 * (PORT)))))
static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
@@ -263,13 +328,29 @@
static void nv_adma_post_internal_cmd(struct ata_queued_cmd *qc);
static void nv_adma_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
+static void nv_mcp55_thaw(struct ata_port *ap);
+static void nv_mcp55_freeze(struct ata_port *ap);
+static void nv_swncq_error_handler(struct ata_port *ap);
+static int nv_swncq_slave_config(struct scsi_device *sdev);
+static int nv_swncq_port_start(struct ata_port *ap);
+static void nv_swncq_qc_prep(struct ata_queued_cmd *qc);
+static void nv_swncq_fill_sg(struct ata_queued_cmd *qc);
+static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc);
+static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis);
+static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance);
+#ifdef CONFIG_PM
+static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg);
+static int nv_swncq_port_resume(struct ata_port *ap);
+#endif
+
enum nv_host_type
{
GENERIC,
NFORCE2,
NFORCE3 = NFORCE2, /* NF2 == NF3 as far as sata_nv is concerned */
CK804,
- ADMA
+ ADMA,
+ SWNCQ,
};
static const struct pci_device_id nv_pci_tbl[] = {
@@ -280,13 +361,13 @@
{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2), CK804 },
{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA), CK804 },
{ PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2), CK804 },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), GENERIC },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), GENERIC },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), GENERIC },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), GENERIC },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), GENERIC },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), GENERIC },
- { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), GENERIC },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), SWNCQ },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), SWNCQ },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), SWNCQ },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), SWNCQ },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), SWNCQ },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), SWNCQ },
+ { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), SWNCQ },
{ } /* terminate list */
};
@@ -339,6 +420,25 @@
.bios_param = ata_std_bios_param,
};
+static struct scsi_host_template nv_swncq_sht = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
+ .ioctl = ata_scsi_ioctl,
+ .queuecommand = ata_scsi_queuecmd,
+ .change_queue_depth = ata_scsi_change_queue_depth,
+ .can_queue = ATA_MAX_QUEUE,
+ .this_id = ATA_SHT_THIS_ID,
+ .sg_tablesize = LIBATA_MAX_PRD,
+ .cmd_per_lun = ATA_SHT_CMD_PER_LUN,
+ .emulated = ATA_SHT_EMULATED,
+ .use_clustering = ATA_SHT_USE_CLUSTERING,
+ .proc_name = DRV_NAME,
+ .dma_boundary = ATA_DMA_BOUNDARY,
+ .slave_configure = nv_swncq_slave_config,
+ .slave_destroy = ata_scsi_slave_destroy,
+ .bios_param = ata_std_bios_param,
+};
+
static const struct ata_port_operations nv_generic_ops = {
.tf_load = ata_tf_load,
.tf_read = ata_tf_read,
@@ -444,6 +544,35 @@
.host_stop = nv_adma_host_stop,
};
+static const struct ata_port_operations nv_swncq_ops = {
+ .tf_load = ata_tf_load,
+ .tf_read = ata_tf_read,
+ .exec_command = ata_exec_command,
+ .check_status = ata_check_status,
+ .dev_select = ata_std_dev_select,
+ .bmdma_setup = ata_bmdma_setup,
+ .bmdma_start = ata_bmdma_start,
+ .bmdma_stop = ata_bmdma_stop,
+ .bmdma_status = ata_bmdma_status,
+ .qc_defer = ata_std_qc_defer,
+ .qc_prep = nv_swncq_qc_prep,
+ .qc_issue = nv_swncq_qc_issue,
+ .freeze = nv_mcp55_freeze,
+ .thaw = nv_mcp55_thaw,
+ .error_handler = nv_swncq_error_handler,
+ .post_internal_cmd = ata_bmdma_post_internal_cmd,
+ .data_xfer = ata_data_xfer,
+ .irq_clear = ata_bmdma_irq_clear,
+ .irq_on = ata_irq_on,
+ .scr_read = nv_scr_read,
+ .scr_write = nv_scr_write,
+#ifdef CONFIG_PM
+ .port_suspend = nv_swncq_port_suspend,
+ .port_resume = nv_swncq_port_resume,
+#endif
+ .port_start = nv_swncq_port_start,
+};
+
static const struct ata_port_info nv_port_info[] = {
/* generic */
{
@@ -490,6 +619,18 @@
.port_ops = &nv_adma_ops,
.irq_handler = nv_adma_interrupt,
},
+ /* SWNCQ */
+ {
+ .sht = &nv_swncq_sht,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
+ ATA_FLAG_NCQ,
+ .link_flags = ATA_LFLAG_HRST_TO_RESUME,
+ .pio_mask = NV_PIO_MASK,
+ .mwdma_mask = NV_MWDMA_MASK,
+ .udma_mask = NV_UDMA_MASK,
+ .port_ops = &nv_swncq_ops,
+ .irq_handler = nv_swncq_interrupt,
+ },
};
MODULE_AUTHOR("NVIDIA");
@@ -499,6 +640,7 @@
MODULE_VERSION(DRV_VERSION);
static int adma_enabled = 1;
+static int swncq_enabled;
static void nv_adma_register_mode(struct ata_port *ap)
{
@@ -1452,6 +1594,34 @@
writeb(mask, mmio_base + NV_INT_ENABLE_CK804);
}
+static void nv_mcp55_freeze(struct ata_port *ap)
+{
+ void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR];
+ int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55;
+ u32 mask;
+
+ writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55);
+
+ mask = readl(mmio_base + NV_INT_ENABLE_MCP55);
+ mask &= ~(NV_INT_ALL_MCP55 << shift);
+ writel(mask, mmio_base + NV_INT_ENABLE_MCP55);
+ ata_bmdma_freeze(ap);
+}
+
+static void nv_mcp55_thaw(struct ata_port *ap)
+{
+ void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR];
+ int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55;
+ u32 mask;
+
+ writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55);
+
+ mask = readl(mmio_base + NV_INT_ENABLE_MCP55);
+ mask |= (NV_INT_MASK_MCP55 << shift);
+ writel(mask, mmio_base + NV_INT_ENABLE_MCP55);
+ ata_bmdma_thaw(ap);
+}
+
static int nv_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline)
{
@@ -1525,6 +1695,663 @@
nv_hardreset, ata_std_postreset);
}
+static void nv_swncq_qc_to_dq(struct ata_port *ap, struct ata_queued_cmd *qc)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ struct defer_queue *dq = &pp->defer_queue;
+
+ /* queue is full */
+ WARN_ON(dq->tail - dq->head == ATA_MAX_QUEUE);
+ dq->defer_bits |= (1 << qc->tag);
+ dq->tag[dq->tail++ & (ATA_MAX_QUEUE - 1)] = qc->tag;
+}
+
+static struct ata_queued_cmd *nv_swncq_qc_from_dq(struct ata_port *ap)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ struct defer_queue *dq = &pp->defer_queue;
+ unsigned int tag;
+
+ if (dq->head == dq->tail) /* null queue */
+ return NULL;
+
+ tag = dq->tag[dq->head & (ATA_MAX_QUEUE - 1)];
+ dq->tag[dq->head++ & (ATA_MAX_QUEUE - 1)] = ATA_TAG_POISON;
+ WARN_ON(!(dq->defer_bits & (1 << tag)));
+ dq->defer_bits &= ~(1 << tag);
+
+ return ata_qc_from_tag(ap, tag);
+}
+
+static void nv_swncq_fis_reinit(struct ata_port *ap)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+
+ pp->dhfis_bits = 0;
+ pp->dmafis_bits = 0;
+ pp->sdbfis_bits = 0;
+ pp->ncq_flags = 0;
+}
+
+static void nv_swncq_pp_reinit(struct ata_port *ap)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ struct defer_queue *dq = &pp->defer_queue;
+
+ dq->head = 0;
+ dq->tail = 0;
+ dq->defer_bits = 0;
+ pp->qc_active = 0;
+ pp->last_issue_tag = ATA_TAG_POISON;
+ nv_swncq_fis_reinit(ap);
+}
+
+static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+
+ writew(fis, pp->irq_block);
+}
+
+static void __ata_bmdma_stop(struct ata_port *ap)
+{
+ struct ata_queued_cmd qc;
+
+ qc.ap = ap;
+ ata_bmdma_stop(&qc);
+}
+
+static void nv_swncq_ncq_stop(struct ata_port *ap)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ unsigned int i;
+ u32 sactive;
+ u32 done_mask;
+
+ ata_port_printk(ap, KERN_ERR,
+ "EH in SWNCQ mode,QC:qc_active 0x%X sactive 0x%X\n",
+ ap->qc_active, ap->link.sactive);
+ ata_port_printk(ap, KERN_ERR,
+ "SWNCQ:qc_active 0x%X defer_bits 0x%X last_issue_tag 0x%x\n "
+ "dhfis 0x%X dmafis 0x%X sdbfis 0x%X\n",
+ pp->qc_active, pp->defer_queue.defer_bits, pp->last_issue_tag,
+ pp->dhfis_bits, pp->dmafis_bits, pp->sdbfis_bits);
+
+ ata_port_printk(ap, KERN_ERR, "ATA_REG 0x%X ERR_REG 0x%X\n",
+ ap->ops->check_status(ap),
+ ioread8(ap->ioaddr.error_addr));
+
+ sactive = readl(pp->sactive_block);
+ done_mask = pp->qc_active ^ sactive;
+
+ ata_port_printk(ap, KERN_ERR, "tag : dhfis dmafis sdbfis sacitve\n");
+ for (i = 0; i < ATA_MAX_QUEUE; i++) {
+ u8 err = 0;
+ if (pp->qc_active & (1 << i))
+ err = 0;
+ else if (done_mask & (1 << i))
+ err = 1;
+ else
+ continue;
+
+ ata_port_printk(ap, KERN_ERR,
+ "tag 0x%x: %01x %01x %01x %01x %s\n", i,
+ (pp->dhfis_bits >> i) & 0x1,
+ (pp->dmafis_bits >> i) & 0x1,
+ (pp->sdbfis_bits >> i) & 0x1,
+ (sactive >> i) & 0x1,
+ (err ? "error! tag doesn't exit" : " "));
+ }
+
+ nv_swncq_pp_reinit(ap);
+ ap->ops->irq_clear(ap);
+ __ata_bmdma_stop(ap);
+ nv_swncq_irq_clear(ap, 0xffff);
+}
+
+static void nv_swncq_error_handler(struct ata_port *ap)
+{
+ struct ata_eh_context *ehc = &ap->link.eh_context;
+
+ if (ap->link.sactive) {
+ nv_swncq_ncq_stop(ap);
+ ehc->i.action |= ATA_EH_HARDRESET;
+ }
+
+ ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset,
+ nv_hardreset, ata_std_postreset);
+}
+
+#ifdef CONFIG_PM
+static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg)
+{
+ void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
+ u32 tmp;
+
+ /* clear irq */
+ writel(~0, mmio + NV_INT_STATUS_MCP55);
+
+ /* disable irq */
+ writel(0, mmio + NV_INT_ENABLE_MCP55);
+
+ /* disable swncq */
+ tmp = readl(mmio + NV_CTL_MCP55);
+ tmp &= ~(NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ);
+ writel(tmp, mmio + NV_CTL_MCP55);
+
+ return 0;
+}
+
+static int nv_swncq_port_resume(struct ata_port *ap)
+{
+ void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
+ u32 tmp;
+
+ /* clear irq */
+ writel(~0, mmio + NV_INT_STATUS_MCP55);
+
+ /* enable irq */
+ writel(0x00fd00fd, mmio + NV_INT_ENABLE_MCP55);
+
+ /* enable swncq */
+ tmp = readl(mmio + NV_CTL_MCP55);
+ writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55);
+
+ return 0;
+}
+#endif
+
+static void nv_swncq_host_init(struct ata_host *host)
+{
+ u32 tmp;
+ void __iomem *mmio = host->iomap[NV_MMIO_BAR];
+ struct pci_dev *pdev = to_pci_dev(host->dev);
+ u8 regval;
+
+ /* disable ECO 398 */
+ pci_read_config_byte(pdev, 0x7f, ®val);
+ regval &= ~(1 << 7);
+ pci_write_config_byte(pdev, 0x7f, regval);
+
+ /* enable swncq */
+ tmp = readl(mmio + NV_CTL_MCP55);
+ VPRINTK("HOST_CTL:0x%X\n", tmp);
+ writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55);
+
+ /* enable irq intr */
+ tmp = readl(mmio + NV_INT_ENABLE_MCP55);
+ VPRINTK("HOST_ENABLE:0x%X\n", tmp);
+ writel(tmp | 0x00fd00fd, mmio + NV_INT_ENABLE_MCP55);
+
+ /* clear port irq */
+ writel(~0x0, mmio + NV_INT_STATUS_MCP55);
+}
+
+static int nv_swncq_slave_config(struct scsi_device *sdev)
+{
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ struct ata_device *dev;
+ int rc;
+ u8 rev;
+ u8 check_maxtor = 0;
+ unsigned char model_num[ATA_ID_PROD_LEN + 1];
+
+ rc = ata_scsi_slave_config(sdev);
+ if (sdev->id >= ATA_MAX_DEVICES || sdev->channel || sdev->lun)
+ /* Not a proper libata device, ignore */
+ return rc;
+
+ dev = &ap->link.device[sdev->id];
+ if (!(ap->flags & ATA_FLAG_NCQ) || dev->class == ATA_DEV_ATAPI)
+ return rc;
+
+ /* if MCP51 and Maxtor, then disable ncq */
+ if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA ||
+ pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2)
+ check_maxtor = 1;
+
+ /* if MCP55 and rev <= a2 and Maxtor, then disable ncq */
+ if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA ||
+ pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2) {
+ pci_read_config_byte(pdev, 0x8, &rev);
+ if (rev <= 0xa2)
+ check_maxtor = 1;
+ }
+
+ if (!check_maxtor)
+ return rc;
+
+ ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
+
+ if (strncmp(model_num, "Maxtor", 6) == 0) {
+ ata_scsi_change_queue_depth(sdev, 1);
+ ata_dev_printk(dev, KERN_NOTICE,
+ "Disabling SWNCQ mode (depth %x)\n", sdev->queue_depth);
+ }
+
+ return rc;
+}
+
+static int nv_swncq_port_start(struct ata_port *ap)
+{
+ struct device *dev = ap->host->dev;
+ void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR];
+ struct nv_swncq_port_priv *pp;
+ int rc;
+
+ rc = ata_port_start(ap);
+ if (rc)
+ return rc;
+
+ pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
+ if (!pp)
+ return -ENOMEM;
+
+ pp->prd = dmam_alloc_coherent(dev, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE,
+ &pp->prd_dma, GFP_KERNEL);
+ if (!pp->prd)
+ return -ENOMEM;
+ memset(pp->prd, 0, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE);
+
+ ap->private_data = pp;
+ pp->sactive_block = ap->ioaddr.scr_addr + 4 * SCR_ACTIVE;
+ pp->irq_block = mmio + NV_INT_STATUS_MCP55 + ap->port_no * 2;
+ pp->tag_block = mmio + NV_NCQ_REG_MCP55 + ap->port_no * 2;
+
+ return 0;
+}
+
+static void nv_swncq_qc_prep(struct ata_queued_cmd *qc)
+{
+ if (qc->tf.protocol != ATA_PROT_NCQ) {
+ ata_qc_prep(qc);
+ return;
+ }
+
+ if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+ return;
+
+ nv_swncq_fill_sg(qc);
+}
+
+static void nv_swncq_fill_sg(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ struct scatterlist *sg;
+ unsigned int idx;
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ struct ata_prd *prd;
+
+ WARN_ON(qc->__sg == NULL);
+ WARN_ON(qc->n_elem == 0 && qc->pad_len == 0);
+
+ prd = pp->prd + ATA_MAX_PRD * qc->tag;
+
+ idx = 0;
+ ata_for_each_sg(sg, qc) {
+ u32 addr, offset;
+ u32 sg_len, len;
+
+ addr = (u32)sg_dma_address(sg);
+ sg_len = sg_dma_len(sg);
+
+ while (sg_len) {
+ offset = addr & 0xffff;
+ len = sg_len;
+ if ((offset + sg_len) > 0x10000)
+ len = 0x10000 - offset;
+
+ prd[idx].addr = cpu_to_le32(addr);
+ prd[idx].flags_len = cpu_to_le32(len & 0xffff);
+
+ idx++;
+ sg_len -= len;
+ addr += len;
+ }
+ }
+
+ if (idx)
+ prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+}
+
+static unsigned int nv_swncq_issue_atacmd(struct ata_port *ap,
+ struct ata_queued_cmd *qc)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+
+ if (qc == NULL)
+ return 0;
+
+ DPRINTK("Enter\n");
+
+ writel((1 << qc->tag), pp->sactive_block);
+ pp->last_issue_tag = qc->tag;
+ pp->dhfis_bits &= ~(1 << qc->tag);
+ pp->dmafis_bits &= ~(1 << qc->tag);
+ pp->qc_active |= (0x1 << qc->tag);
+
+ ap->ops->tf_load(ap, &qc->tf); /* load tf registers */
+ ap->ops->exec_command(ap, &qc->tf);
+
+ DPRINTK("Issued tag %u\n", qc->tag);
+
+ return 0;
+}
+
+static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ struct nv_swncq_port_priv *pp = ap->private_data;
+
+ if (qc->tf.protocol != ATA_PROT_NCQ)
+ return ata_qc_issue_prot(qc);
+
+ DPRINTK("Enter\n");
+
+ if (!pp->qc_active)
+ nv_swncq_issue_atacmd(ap, qc);
+ else
+ nv_swncq_qc_to_dq(ap, qc); /* add qc to defer queue */
+
+ return 0;
+}
+
+static void nv_swncq_hotplug(struct ata_port *ap, u32 fis)
+{
+ u32 serror;
+ struct ata_eh_info *ehi = &ap->link.eh_info;
+
+ ata_ehi_clear_desc(ehi);
+
+ /* AHCI needs SError cleared; otherwise, it might lock up */
+ sata_scr_read(&ap->link, SCR_ERROR, &serror);
+ sata_scr_write(&ap->link, SCR_ERROR, serror);
+
+ /* analyze @irq_stat */
+ if (fis & NV_SWNCQ_IRQ_ADDED)
+ ata_ehi_push_desc(ehi, "hot plug");
+ else if (fis & NV_SWNCQ_IRQ_REMOVED)
+ ata_ehi_push_desc(ehi, "hot unplug");
+
+ ata_ehi_hotplugged(ehi);
+
+ /* okay, let's hand over to EH */
+ ehi->serror |= serror;
+
+ ata_port_freeze(ap);
+}
+
+static int nv_swncq_sdbfis(struct ata_port *ap)
+{
+ struct ata_queued_cmd *qc;
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ struct ata_eh_info *ehi = &ap->link.eh_info;
+ u32 sactive;
+ int nr_done = 0;
+ u32 done_mask;
+ int i;
+ u8 host_stat;
+ u8 lack_dhfis = 0;
+
+ host_stat = ap->ops->bmdma_status(ap);
+ if (unlikely(host_stat & ATA_DMA_ERR)) {
+ /* error when transfering data to/from memory */
+ ata_ehi_clear_desc(ehi);
+ ata_ehi_push_desc(ehi, "BMDMA stat 0x%x", host_stat);
+ ehi->err_mask |= AC_ERR_HOST_BUS;
+ ehi->action |= ATA_EH_SOFTRESET;
+ return -EINVAL;
+ }
+
+ ap->ops->irq_clear(ap);
+ __ata_bmdma_stop(ap);
+
+ sactive = readl(pp->sactive_block);
+ done_mask = pp->qc_active ^ sactive;
+
+ if (unlikely(done_mask & sactive)) {
+ ata_ehi_clear_desc(ehi);
+ ata_ehi_push_desc(ehi, "illegal SWNCQ:qc_active transition"
+ "(%08x->%08x)", pp->qc_active, sactive);
+ ehi->err_mask |= AC_ERR_HSM;
+ ehi->action |= ATA_EH_HARDRESET;
+ return -EINVAL;
+ }
+ for (i = 0; i < ATA_MAX_QUEUE; i++) {
+ if (!(done_mask & (1 << i)))
+ continue;
+
+ qc = ata_qc_from_tag(ap, i);
+ if (qc) {
+ ata_qc_complete(qc);
+ pp->qc_active &= ~(1 << i);
+ pp->dhfis_bits &= ~(1 << i);
+ pp->dmafis_bits &= ~(1 << i);
+ pp->sdbfis_bits |= (1 << i);
+ nr_done++;
+ }
+ }
+
+ if (!ap->qc_active) {
+ DPRINTK("over\n");
+ nv_swncq_pp_reinit(ap);
+ return nr_done;
+ }
+
+ if (pp->qc_active & pp->dhfis_bits)
+ return nr_done;
+
+ if ((pp->ncq_flags & ncq_saw_backout) ||
+ (pp->qc_active ^ pp->dhfis_bits))
+ /* if the controller cann't get a device to host register FIS,
+ * The driver needs to reissue the new command.
+ */
+ lack_dhfis = 1;
+
+ DPRINTK("id 0x%x QC: qc_active 0x%x,"
+ "SWNCQ:qc_active 0x%X defer_bits %X "
+ "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n",
+ ap->print_id, ap->qc_active, pp->qc_active,
+ pp->defer_queue.defer_bits, pp->dhfis_bits,
+ pp->dmafis_bits, pp->last_issue_tag);
+
+ nv_swncq_fis_reinit(ap);
+
+ if (lack_dhfis) {
+ qc = ata_qc_from_tag(ap, pp->last_issue_tag);
+ nv_swncq_issue_atacmd(ap, qc);
+ return nr_done;
+ }
+
+ if (pp->defer_queue.defer_bits) {
+ /* send deferral queue command */
+ qc = nv_swncq_qc_from_dq(ap);
+ WARN_ON(qc == NULL);
+ nv_swncq_issue_atacmd(ap, qc);
+ }
+
+ return nr_done;
+}
+
+static inline u32 nv_swncq_tag(struct ata_port *ap)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ u32 tag;
+
+ tag = readb(pp->tag_block) >> 2;
+ return (tag & 0x1f);
+}
+
+static int nv_swncq_dmafis(struct ata_port *ap)
+{
+ struct ata_queued_cmd *qc;
+ unsigned int rw;
+ u8 dmactl;
+ u32 tag;
+ struct nv_swncq_port_priv *pp = ap->private_data;
+
+ __ata_bmdma_stop(ap);
+ tag = nv_swncq_tag(ap);
+
+ DPRINTK("dma setup tag 0x%x\n", tag);
+ qc = ata_qc_from_tag(ap, tag);
+
+ if (unlikely(!qc))
+ return 0;
+
+ rw = qc->tf.flags & ATA_TFLAG_WRITE;
+
+ /* load PRD table addr. */
+ iowrite32(pp->prd_dma + ATA_PRD_TBL_SZ * qc->tag,
+ ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
+
+ /* specify data direction, triple-check start bit is clear */
+ dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+ dmactl &= ~ATA_DMA_WR;
+ if (!rw)
+ dmactl |= ATA_DMA_WR;
+
+ iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+
+ return 1;
+}
+
+static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis)
+{
+ struct nv_swncq_port_priv *pp = ap->private_data;
+ struct ata_queued_cmd *qc;
+ struct ata_eh_info *ehi = &ap->link.eh_info;
+ u32 serror;
+ u8 ata_stat;
+ int rc = 0;
+
+ ata_stat = ap->ops->check_status(ap);
+ nv_swncq_irq_clear(ap, fis);
+ if (!fis)
+ return;
+
+ if (ap->pflags & ATA_PFLAG_FROZEN)
+ return;
+
+ if (fis & NV_SWNCQ_IRQ_HOTPLUG) {
+ nv_swncq_hotplug(ap, fis);
+ return;
+ }
+
+ if (!pp->qc_active)
+ return;
+
+ if (ap->ops->scr_read(ap, SCR_ERROR, &serror))
+ return;
+ ap->ops->scr_write(ap, SCR_ERROR, serror);
+
+ if (ata_stat & ATA_ERR) {
+ ata_ehi_clear_desc(ehi);
+ ata_ehi_push_desc(ehi, "Ata error. fis:0x%X", fis);
+ ehi->err_mask |= AC_ERR_DEV;
+ ehi->serror |= serror;
+ ehi->action |= ATA_EH_SOFTRESET;
+ ata_port_freeze(ap);
+ return;
+ }
+
+ if (fis & NV_SWNCQ_IRQ_BACKOUT) {
+ /* If the IRQ is backout, driver must issue
+ * the new command again some time later.
+ */
+ pp->ncq_flags |= ncq_saw_backout;
+ }
+
+ if (fis & NV_SWNCQ_IRQ_SDBFIS) {
+ pp->ncq_flags |= ncq_saw_sdb;
+ DPRINTK("id 0x%x SWNCQ: qc_active 0x%X "
+ "dhfis 0x%X dmafis 0x%X sactive 0x%X\n",
+ ap->print_id, pp->qc_active, pp->dhfis_bits,
+ pp->dmafis_bits, readl(pp->sactive_block));
+ rc = nv_swncq_sdbfis(ap);
+ if (rc < 0)
+ goto irq_error;
+ }
+
+ if (fis & NV_SWNCQ_IRQ_DHREGFIS) {
+ /* The interrupt indicates the new command
+ * was transmitted correctly to the drive.
+ */
+ pp->dhfis_bits |= (0x1 << pp->last_issue_tag);
+ pp->ncq_flags |= ncq_saw_d2h;
+ if (pp->ncq_flags & (ncq_saw_sdb | ncq_saw_backout)) {
+ ata_ehi_push_desc(ehi, "illegal fis transaction");
+ ehi->err_mask |= AC_ERR_HSM;
+ ehi->action |= ATA_EH_HARDRESET;
+ goto irq_error;
+ }
+
+ if (!(fis & NV_SWNCQ_IRQ_DMASETUP) &&
+ !(pp->ncq_flags & ncq_saw_dmas)) {
+ ata_stat = ap->ops->check_status(ap);
+ if (ata_stat & ATA_BUSY)
+ goto irq_exit;
+
+ if (pp->defer_queue.defer_bits) {
+ DPRINTK("send next command\n");
+ qc = nv_swncq_qc_from_dq(ap);
+ nv_swncq_issue_atacmd(ap, qc);
+ }
+ }
+ }
+
+ if (fis & NV_SWNCQ_IRQ_DMASETUP) {
+ /* program the dma controller with appropriate PRD buffers
+ * and start the DMA transfer for requested command.
+ */
+ pp->dmafis_bits |= (0x1 << nv_swncq_tag(ap));
+ pp->ncq_flags |= ncq_saw_dmas;
+ rc = nv_swncq_dmafis(ap);
+ }
+
+irq_exit:
+ return;
+irq_error:
+ ata_ehi_push_desc(ehi, "fis:0x%x", fis);
+ ata_port_freeze(ap);
+ return;
+}
+
+static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance)
+{
+ struct ata_host *host = dev_instance;
+ unsigned int i;
+ unsigned int handled = 0;
+ unsigned long flags;
+ u32 irq_stat;
+
+ spin_lock_irqsave(&host->lock, flags);
+
+ irq_stat = readl(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_MCP55);
+
+ for (i = 0; i < host->n_ports; i++) {
+ struct ata_port *ap = host->ports[i];
+
+ if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
+ if (ap->link.sactive) {
+ nv_swncq_host_interrupt(ap, (u16)irq_stat);
+ handled = 1;
+ } else {
+ if (irq_stat) /* reserve Hotplug */
+ nv_swncq_irq_clear(ap, 0xfff0);
+
+ handled += nv_host_intr(ap, (u8)irq_stat);
+ }
+ }
+ irq_stat >>= NV_INT_PORT_SHIFT_MCP55;
+ }
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ return IRQ_RETVAL(handled);
+}
+
static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
{
static int printed_version = 0;
@@ -1551,7 +2378,7 @@
return rc;
/* determine type and allocate host */
- if (type >= CK804 && adma_enabled) {
+ if (type == CK804 && adma_enabled) {
dev_printk(KERN_NOTICE, &pdev->dev, "Using ADMA mode\n");
type = ADMA;
}
@@ -1597,6 +2424,9 @@
rc = nv_adma_host_init(host);
if (rc)
return rc;
+ } else if (type == SWNCQ && swncq_enabled) {
+ dev_printk(KERN_NOTICE, &pdev->dev, "Using SWNCQ mode\n");
+ nv_swncq_host_init(host);
}
pci_set_master(pdev);
@@ -1696,3 +2526,6 @@
module_exit(nv_exit);
module_param_named(adma, adma_enabled, bool, 0444);
MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: true)");
+module_param_named(swncq, swncq_enabled, bool, 0444);
+MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: false)");
+
diff --git a/drivers/char/ec3104_keyb.c b/drivers/char/ec3104_keyb.c
deleted file mode 100644
index 0200114..0000000
--- a/drivers/char/ec3104_keyb.c
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * linux/drivers/char/ec3104_keyb.c
- *
- * Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>
- *
- * based on linux/drivers/char/pc_keyb.c, which had the following comments:
- *
- * Separation of the PC low-level part by Geert Uytterhoeven, May 1997
- * See keyboard.c for the whole history.
- *
- * Major cleanup by Martin Mares, May 1997
- *
- * Combined the keyboard and PS/2 mouse handling into one file,
- * because they share the same hardware.
- * Johan Myreen <jem@iki.fi> 1998-10-08.
- *
- * Code fixes to handle mouse ACKs properly.
- * C. Scott Ananian <cananian@alumni.princeton.edu> 1999-01-29.
- */
-/* EC3104 note:
- * This code was written without any documentation about the EC3104 chip. While
- * I hope I got most of the basic functionality right, the register names I use
- * are most likely completely different from those in the chip documentation.
- *
- * If you have any further information about the EC3104, please tell me
- * (prumpf@tux.org).
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/tty.h>
-#include <linux/mm.h>
-#include <linux/signal.h>
-#include <linux/init.h>
-#include <linux/kbd_ll.h>
-#include <linux/delay.h>
-#include <linux/random.h>
-#include <linux/poll.h>
-#include <linux/miscdevice.h>
-#include <linux/slab.h>
-#include <linux/kbd_kern.h>
-#include <linux/bitops.h>
-
-#include <asm/keyboard.h>
-#include <asm/uaccess.h>
-#include <asm/irq.h>
-#include <asm/system.h>
-#include <asm/ec3104.h>
-
-#include <asm/io.h>
-
-/* Some configuration switches are present in the include file... */
-
-#include <linux/pc_keyb.h>
-
-#define MSR_CTS 0x10
-#define MCR_RTS 0x02
-#define LSR_DR 0x01
-#define LSR_BOTH_EMPTY 0x60
-
-static struct e5_struct {
- u8 packet[8];
- int pos;
- int length;
-
- u8 cached_mcr;
- u8 last_msr;
-} ec3104_keyb;
-
-/* Simple translation table for the SysRq keys */
-
-
-#ifdef CONFIG_MAGIC_SYSRQ
-unsigned char ec3104_kbd_sysrq_xlate[128] =
- "\000\0331234567890-=\177\t" /* 0x00 - 0x0f */
- "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */
- "dfghjkl;'`\000\\zxcv" /* 0x20 - 0x2f */
- "bnm,./\000*\000 \000\201\202\203\204\205" /* 0x30 - 0x3f */
- "\206\207\210\211\212\000\000789-456+1" /* 0x40 - 0x4f */
- "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
- "\r\000/"; /* 0x60 - 0x6f */
-#endif
-
-static void kbd_write_command_w(int data);
-static void kbd_write_output_w(int data);
-#ifdef CONFIG_PSMOUSE
-static void aux_write_ack(int val);
-static void __aux_write_ack(int val);
-#endif
-
-static DEFINE_SPINLOCK(kbd_controller_lock);
-static unsigned char handle_kbd_event(void);
-
-/* used only by send_data - set by keyboard_interrupt */
-static volatile unsigned char reply_expected;
-static volatile unsigned char acknowledge;
-static volatile unsigned char resend;
-
-
-int ec3104_kbd_setkeycode(unsigned int scancode, unsigned int keycode)
-{
- return 0;
-}
-
-int ec3104_kbd_getkeycode(unsigned int scancode)
-{
- return 0;
-}
-
-
-/* yes, it probably would be faster to use an array. I don't care. */
-
-static inline unsigned char ec3104_scan2key(unsigned char scancode)
-{
- switch (scancode) {
- case 1: /* '`' */
- return 41;
-
- case 2 ... 27:
- return scancode;
-
- case 28: /* '\\' */
- return 43;
-
- case 29 ... 39:
- return scancode + 1;
-
- case 40: /* '\r' */
- return 28;
-
- case 41 ... 50:
- return scancode + 3;
-
- case 51: /* ' ' */
- return 57;
-
- case 52: /* escape */
- return 1;
-
- case 54: /* insert/delete (labelled delete) */
- /* this should arguably be 110, but I'd like to have ctrl-alt-del
- * working with a standard keymap */
- return 111;
-
- case 55: /* left */
- return 105;
- case 56: /* home */
- return 102;
- case 57: /* end */
- return 107;
- case 58: /* up */
- return 103;
- case 59: /* down */
- return 108;
- case 60: /* pgup */
- return 104;
- case 61: /* pgdown */
- return 109;
- case 62: /* right */
- return 106;
-
- case 79 ... 88: /* f1 - f10 */
- return scancode - 20;
-
- case 89 ... 90: /* f11 - f12 */
- return scancode - 2;
-
- case 91: /* left shift */
- return 42;
-
- case 92: /* right shift */
- return 54;
-
- case 93: /* left alt */
- return 56;
- case 94: /* right alt */
- return 100;
- case 95: /* left ctrl */
- return 29;
- case 96: /* right ctrl */
- return 97;
-
- case 97: /* caps lock */
- return 58;
- case 102: /* left windows */
- return 125;
- case 103: /* right windows */
- return 126;
-
- case 106: /* Fn */
- /* this is wrong. */
- return 84;
-
- default:
- return 0;
- }
-}
-
-int ec3104_kbd_translate(unsigned char scancode, unsigned char *keycode,
- char raw_mode)
-{
- scancode &= 0x7f;
-
- *keycode = ec3104_scan2key(scancode);
-
- return 1;
-}
-
-char ec3104_kbd_unexpected_up(unsigned char keycode)
-{
- return 0200;
-}
-
-static inline void handle_keyboard_event(unsigned char scancode)
-{
-#ifdef CONFIG_VT
- handle_scancode(scancode, !(scancode & 0x80));
-#endif
- tasklet_schedule(&keyboard_tasklet);
-}
-
-void ec3104_kbd_leds(unsigned char leds)
-{
-}
-
-static u8 e5_checksum(u8 *packet, int count)
-{
- int i;
- u8 sum = 0;
-
- for (i=0; i<count; i++)
- sum ^= packet[i];
-
- if (sum & 0x80)
- sum ^= 0xc0;
-
- return sum;
-}
-
-static void e5_wait_for_cts(struct e5_struct *k)
-{
- u8 msr;
-
- do {
- msr = ctrl_inb(EC3104_SER4_MSR);
- } while (!(msr & MSR_CTS));
-}
-
-
-static void e5_send_byte(u8 byte, struct e5_struct *k)
-{
- u8 status;
-
- do {
- status = ctrl_inb(EC3104_SER4_LSR);
- } while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY);
-
- printk("<%02x>", byte);
-
- ctrl_outb(byte, EC3104_SER4_DATA);
-
- do {
- status = ctrl_inb(EC3104_SER4_LSR);
- } while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY);
-
-}
-
-static int e5_send_packet(u8 *packet, int count, struct e5_struct *k)
-{
- int i;
-
- disable_irq(EC3104_IRQ_SER4);
-
- if (k->cached_mcr & MCR_RTS) {
- printk("e5_send_packet: too slow\n");
- enable_irq(EC3104_IRQ_SER4);
- return -EAGAIN;
- }
-
- k->cached_mcr |= MCR_RTS;
- ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-
- e5_wait_for_cts(k);
-
- printk("p: ");
-
- for(i=0; i<count; i++)
- e5_send_byte(packet[i], k);
-
- e5_send_byte(e5_checksum(packet, count), k);
-
- printk("\n");
-
- udelay(1500);
-
- k->cached_mcr &= ~MCR_RTS;
- ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
-
- set_current_state(TASK_UNINTERRUPTIBLE);
-
-
-
- enable_irq(EC3104_IRQ_SER4);
-
-
-
- return 0;
-}
-
-/*
- * E5 packets we know about:
- * E5->host 0x80 0x05 <checksum> - resend packet
- * host->E5 0x83 0x43 <contrast> - set LCD contrast
- * host->E5 0x85 0x41 0x02 <brightness> 0x02 - set LCD backlight
- * E5->host 0x87 <ps2 packet> 0x00 <checksum> - external PS2
- * E5->host 0x88 <scancode> <checksum> - key press
- */
-
-static void e5_receive(struct e5_struct *k)
-{
- k->packet[k->pos++] = ctrl_inb(EC3104_SER4_DATA);
-
- if (k->pos == 1) {
- switch(k->packet[0]) {
- case 0x80:
- k->length = 3;
- break;
-
- case 0x87: /* PS2 ext */
- k->length = 6;
- break;
-
- case 0x88: /* keyboard */
- k->length = 3;
- break;
-
- default:
- k->length = 1;
- printk(KERN_WARNING "unknown E5 packet %02x\n",
- k->packet[0]);
- }
- }
-
- if (k->pos == k->length) {
- int i;
-
- if (e5_checksum(k->packet, k->length) != 0)
- printk(KERN_WARNING "E5: wrong checksum\n");
-
-#if 0
- printk("E5 packet [");
- for(i=0; i<k->length; i++) {
- printk("%02x ", k->packet[i]);
- }
-
- printk("(%02x)]\n", e5_checksum(k->packet, k->length-1));
-#endif
-
- switch(k->packet[0]) {
- case 0x80:
- case 0x88:
- handle_keyboard_event(k->packet[1]);
- break;
- }
-
- k->pos = k->length = 0;
- }
-}
-
-static void ec3104_keyb_interrupt(int irq, void *data)
-{
- struct e5_struct *k = &ec3104_keyb;
- u8 msr, lsr;
-
- msr = ctrl_inb(EC3104_SER4_MSR);
-
- if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) {
- if (k->cached_mcr & MCR_RTS)
- printk("confused: RTS already high\n");
- /* CTS went high. Send RTS. */
- k->cached_mcr |= MCR_RTS;
-
- ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
- } else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) {
- /* CTS went low. */
- if (!(k->cached_mcr & MCR_RTS))
- printk("confused: RTS already low\n");
-
- k->cached_mcr &= ~MCR_RTS;
-
- ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
- }
-
- k->last_msr = msr;
-
- lsr = ctrl_inb(EC3104_SER4_LSR);
-
- if (lsr & LSR_DR)
- e5_receive(k);
-}
-
-static void ec3104_keyb_clear_state(void)
-{
- struct e5_struct *k = &ec3104_keyb;
- u8 msr, lsr;
-
- /* we want CTS to be low */
- k->last_msr = 0;
-
- for (;;) {
- msleep(100);
-
- msr = ctrl_inb(EC3104_SER4_MSR);
-
- lsr = ctrl_inb(EC3104_SER4_LSR);
-
- if (lsr & LSR_DR) {
- e5_receive(k);
- continue;
- }
-
- if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) {
- if (k->cached_mcr & MCR_RTS)
- printk("confused: RTS already high\n");
- /* CTS went high. Send RTS. */
- k->cached_mcr |= MCR_RTS;
-
- ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
- } else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) {
- /* CTS went low. */
- if (!(k->cached_mcr & MCR_RTS))
- printk("confused: RTS already low\n");
-
- k->cached_mcr &= ~MCR_RTS;
-
- ctrl_outb(k->cached_mcr, EC3104_SER4_MCR);
- } else
- break;
-
- k->last_msr = msr;
-
- continue;
- }
-}
-
-void __init ec3104_kbd_init_hw(void)
-{
- ec3104_keyb.last_msr = ctrl_inb(EC3104_SER4_MSR);
- ec3104_keyb.cached_mcr = ctrl_inb(EC3104_SER4_MCR);
-
- ec3104_keyb_clear_state();
-
- /* Ok, finally allocate the IRQ, and off we go.. */
- request_irq(EC3104_IRQ_SER4, ec3104_keyb_interrupt, 0, "keyboard", NULL);
-}
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index e47f881..700a165 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -158,6 +158,7 @@
config SENSORS_AMS
tristate "Apple Motion Sensor driver"
depends on PPC_PMAC && !PPC64 && INPUT && ((ADB_PMU && I2C = y) || (ADB_PMU && !I2C) || I2C) && EXPERIMENTAL
+ select INPUT_POLLDEV
help
Support for the motion sensor included in PowerBooks. Includes
implementations for PMU and I2C.
@@ -701,6 +702,7 @@
config SENSORS_HDAPS
tristate "IBM Hard Drive Active Protection System (hdaps)"
depends on INPUT && X86
+ select INPUT_POLLDEV
default n
help
This driver provides support for the IBM Hard Drive Active Protection
@@ -722,6 +724,7 @@
depends on INPUT && X86
select NEW_LEDS
select LEDS_CLASS
+ select INPUT_POLLDEV
default n
help
This driver provides support for the Apple System Management
diff --git a/drivers/hwmon/ams/ams-input.c b/drivers/hwmon/ams/ams-input.c
index ca7095d..7b81e0c2 100644
--- a/drivers/hwmon/ams/ams-input.c
+++ b/drivers/hwmon/ams/ams-input.c
@@ -27,47 +27,32 @@
module_param(invert, bool, 0644);
MODULE_PARM_DESC(invert, "Invert input data on X and Y axis");
-static int ams_input_kthread(void *data)
+static void ams_idev_poll(struct input_polled_dev *dev)
{
+ struct input_dev *idev = dev->input;
s8 x, y, z;
- while (!kthread_should_stop()) {
- mutex_lock(&ams_info.lock);
+ mutex_lock(&ams_info.lock);
- ams_sensors(&x, &y, &z);
+ ams_sensors(&x, &y, &z);
- x -= ams_info.xcalib;
- y -= ams_info.ycalib;
- z -= ams_info.zcalib;
+ x -= ams_info.xcalib;
+ y -= ams_info.ycalib;
+ z -= ams_info.zcalib;
- input_report_abs(ams_info.idev, ABS_X, invert ? -x : x);
- input_report_abs(ams_info.idev, ABS_Y, invert ? -y : y);
- input_report_abs(ams_info.idev, ABS_Z, z);
+ input_report_abs(idev, ABS_X, invert ? -x : x);
+ input_report_abs(idev, ABS_Y, invert ? -y : y);
+ input_report_abs(idev, ABS_Z, z);
- input_sync(ams_info.idev);
+ input_sync(idev);
- mutex_unlock(&ams_info.lock);
-
- msleep(25);
- }
-
- return 0;
-}
-
-static int ams_input_open(struct input_dev *dev)
-{
- ams_info.kthread = kthread_run(ams_input_kthread, NULL, "kams");
- return IS_ERR(ams_info.kthread) ? PTR_ERR(ams_info.kthread) : 0;
-}
-
-static void ams_input_close(struct input_dev *dev)
-{
- kthread_stop(ams_info.kthread);
+ mutex_unlock(&ams_info.lock);
}
/* Call with ams_info.lock held! */
static void ams_input_enable(void)
{
+ struct input_dev *input;
s8 x, y, z;
if (ams_info.idev)
@@ -78,27 +63,29 @@
ams_info.ycalib = y;
ams_info.zcalib = z;
- ams_info.idev = input_allocate_device();
+ ams_info.idev = input_allocate_polled_device();
if (!ams_info.idev)
return;
- ams_info.idev->name = "Apple Motion Sensor";
- ams_info.idev->id.bustype = ams_info.bustype;
- ams_info.idev->id.vendor = 0;
- ams_info.idev->open = ams_input_open;
- ams_info.idev->close = ams_input_close;
- ams_info.idev->dev.parent = &ams_info.of_dev->dev;
+ ams_info.idev->poll = ams_idev_poll;
+ ams_info.idev->poll_interval = 25;
- input_set_abs_params(ams_info.idev, ABS_X, -50, 50, 3, 0);
- input_set_abs_params(ams_info.idev, ABS_Y, -50, 50, 3, 0);
- input_set_abs_params(ams_info.idev, ABS_Z, -50, 50, 3, 0);
+ input = ams_info.idev->input;
+ input->name = "Apple Motion Sensor";
+ input->id.bustype = ams_info.bustype;
+ input->id.vendor = 0;
+ input->dev.parent = &ams_info.of_dev->dev;
- set_bit(EV_ABS, ams_info.idev->evbit);
- set_bit(EV_KEY, ams_info.idev->evbit);
- set_bit(BTN_TOUCH, ams_info.idev->keybit);
+ input_set_abs_params(input, ABS_X, -50, 50, 3, 0);
+ input_set_abs_params(input, ABS_Y, -50, 50, 3, 0);
+ input_set_abs_params(input, ABS_Z, -50, 50, 3, 0);
- if (input_register_device(ams_info.idev)) {
- input_free_device(ams_info.idev);
+ set_bit(EV_ABS, input->evbit);
+ set_bit(EV_KEY, input->evbit);
+ set_bit(BTN_TOUCH, input->keybit);
+
+ if (input_register_polled_device(ams_info.idev)) {
+ input_free_polled_device(ams_info.idev);
ams_info.idev = NULL;
return;
}
@@ -108,7 +95,8 @@
static void ams_input_disable(void)
{
if (ams_info.idev) {
- input_unregister_device(ams_info.idev);
+ input_unregister_polled_device(ams_info.idev);
+ input_free_polled_device(ams_info.idev);
ams_info.idev = NULL;
}
}
diff --git a/drivers/hwmon/ams/ams.h b/drivers/hwmon/ams/ams.h
index 240730e..a6221e5 100644
--- a/drivers/hwmon/ams/ams.h
+++ b/drivers/hwmon/ams/ams.h
@@ -1,5 +1,5 @@
#include <linux/i2c.h>
-#include <linux/input.h>
+#include <linux/input-polldev.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
@@ -52,8 +52,7 @@
#endif
/* Joystick emulation */
- struct task_struct *kthread;
- struct input_dev *idev;
+ struct input_polled_dev *idev;
__u16 bustype;
/* calibrated null values */
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index f37fd7e..4879125 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -28,7 +28,7 @@
#include <linux/delay.h>
#include <linux/platform_device.h>
-#include <linux/input.h>
+#include <linux/input-polldev.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/timer.h>
@@ -59,9 +59,9 @@
#define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6 bytes) */
#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6 bytes) */
-#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */
+#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */
-#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */
+#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */
#define MOTION_SENSOR_X_KEY "MO_X" /* r-o sp78 (2 bytes) */
#define MOTION_SENSOR_Y_KEY "MO_Y" /* r-o sp78 (2 bytes) */
@@ -103,7 +103,7 @@
#define INIT_TIMEOUT_MSECS 5000 /* wait up to 5s for device init ... */
#define INIT_WAIT_MSECS 50 /* ... in 50ms increments */
-#define APPLESMC_POLL_PERIOD (HZ/20) /* poll for input every 1/20s */
+#define APPLESMC_POLL_INTERVAL 50 /* msecs */
#define APPLESMC_INPUT_FUZZ 4 /* input event threshold */
#define APPLESMC_INPUT_FLAT 4
@@ -125,9 +125,8 @@
static struct platform_device *pdev;
static s16 rest_x;
static s16 rest_y;
-static struct timer_list applesmc_timer;
-static struct input_dev *applesmc_idev;
static struct device *hwmon_dev;
+static struct input_polled_dev *applesmc_idev;
/* Indicates whether this computer has an accelerometer. */
static unsigned int applesmc_accelerometer;
@@ -138,7 +137,7 @@
/* Indicates which temperature sensors set to use. */
static unsigned int applesmc_temperature_set;
-static struct mutex applesmc_lock;
+static DEFINE_MUTEX(applesmc_lock);
/*
* Last index written to key_at_index sysfs file, and value to use for all other
@@ -455,27 +454,12 @@
rest_x = -rest_x;
}
-static int applesmc_idev_open(struct input_dev *dev)
+static void applesmc_idev_poll(struct input_polled_dev *dev)
{
- add_timer(&applesmc_timer);
-
- return 0;
-}
-
-static void applesmc_idev_close(struct input_dev *dev)
-{
- del_timer_sync(&applesmc_timer);
-}
-
-static void applesmc_idev_poll(unsigned long unused)
-{
+ struct input_dev *idev = dev->input;
s16 x, y;
- /* Cannot sleep. Try nonblockingly. If we fail, try again later. */
- if (!mutex_trylock(&applesmc_lock)) {
- mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD);
- return;
- }
+ mutex_lock(&applesmc_lock);
if (applesmc_read_motion_sensor(SENSOR_X, &x))
goto out;
@@ -483,13 +467,11 @@
goto out;
x = -x;
- input_report_abs(applesmc_idev, ABS_X, x - rest_x);
- input_report_abs(applesmc_idev, ABS_Y, y - rest_y);
- input_sync(applesmc_idev);
+ input_report_abs(idev, ABS_X, x - rest_x);
+ input_report_abs(idev, ABS_Y, y - rest_y);
+ input_sync(idev);
out:
- mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD);
-
mutex_unlock(&applesmc_lock);
}
@@ -821,8 +803,7 @@
if (!ret) {
return info[0];
- }
- else {
+ } else {
return ret;
}
}
@@ -1093,6 +1074,7 @@
/* Create accelerometer ressources */
static int applesmc_create_accelerometer(void)
{
+ struct input_dev *idev;
int ret;
ret = sysfs_create_group(&pdev->dev.kobj,
@@ -1100,40 +1082,37 @@
if (ret)
goto out;
- applesmc_idev = input_allocate_device();
+ applesmc_idev = input_allocate_polled_device();
if (!applesmc_idev) {
ret = -ENOMEM;
goto out_sysfs;
}
+ applesmc_idev->poll = applesmc_idev_poll;
+ applesmc_idev->poll_interval = APPLESMC_POLL_INTERVAL;
+
/* initial calibrate for the input device */
applesmc_calibrate();
- /* initialize the input class */
- applesmc_idev->name = "applesmc";
- applesmc_idev->id.bustype = BUS_HOST;
- applesmc_idev->dev.parent = &pdev->dev;
- applesmc_idev->evbit[0] = BIT(EV_ABS);
- applesmc_idev->open = applesmc_idev_open;
- applesmc_idev->close = applesmc_idev_close;
- input_set_abs_params(applesmc_idev, ABS_X,
+ /* initialize the input device */
+ idev = applesmc_idev->input;
+ idev->name = "applesmc";
+ idev->id.bustype = BUS_HOST;
+ idev->dev.parent = &pdev->dev;
+ idev->evbit[0] = BIT(EV_ABS);
+ input_set_abs_params(idev, ABS_X,
-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
- input_set_abs_params(applesmc_idev, ABS_Y,
+ input_set_abs_params(idev, ABS_Y,
-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
- ret = input_register_device(applesmc_idev);
+ ret = input_register_polled_device(applesmc_idev);
if (ret)
goto out_idev;
- /* start up our timer for the input device */
- init_timer(&applesmc_timer);
- applesmc_timer.function = applesmc_idev_poll;
- applesmc_timer.expires = jiffies + APPLESMC_POLL_PERIOD;
-
return 0;
out_idev:
- input_free_device(applesmc_idev);
+ input_free_polled_device(applesmc_idev);
out_sysfs:
sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group);
@@ -1146,8 +1125,8 @@
/* Release all ressources used by the accelerometer */
static void applesmc_release_accelerometer(void)
{
- del_timer_sync(&applesmc_timer);
- input_unregister_device(applesmc_idev);
+ input_unregister_polled_device(applesmc_idev);
+ input_free_polled_device(applesmc_idev);
sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group);
}
@@ -1184,8 +1163,6 @@
int count;
int i;
- mutex_init(&applesmc_lock);
-
if (!dmi_check_system(applesmc_whitelist)) {
printk(KERN_WARNING "applesmc: supported laptop not found!\n");
ret = -ENODEV;
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index a7c6d40..8a7ae03 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -28,7 +28,7 @@
#include <linux/delay.h>
#include <linux/platform_device.h>
-#include <linux/input.h>
+#include <linux/input-polldev.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/module.h>
@@ -61,13 +61,12 @@
#define INIT_TIMEOUT_MSECS 4000 /* wait up to 4s for device init ... */
#define INIT_WAIT_MSECS 200 /* ... in 200ms increments */
-#define HDAPS_POLL_PERIOD (HZ/20) /* poll for input every 1/20s */
+#define HDAPS_POLL_INTERVAL 50 /* poll for input every 1/20s (50 ms)*/
#define HDAPS_INPUT_FUZZ 4 /* input event threshold */
#define HDAPS_INPUT_FLAT 4
-static struct timer_list hdaps_timer;
static struct platform_device *pdev;
-static struct input_dev *hdaps_idev;
+static struct input_polled_dev *hdaps_idev;
static unsigned int hdaps_invert;
static u8 km_activity;
static int rest_x;
@@ -323,24 +322,19 @@
__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y);
}
-static void hdaps_mousedev_poll(unsigned long unused)
+static void hdaps_mousedev_poll(struct input_polled_dev *dev)
{
+ struct input_dev *input_dev = dev->input;
int x, y;
- /* Cannot sleep. Try nonblockingly. If we fail, try again later. */
- if (mutex_trylock(&hdaps_mtx)) {
- mod_timer(&hdaps_timer,jiffies + HDAPS_POLL_PERIOD);
- return;
- }
+ mutex_lock(&hdaps_mtx);
if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y))
goto out;
- input_report_abs(hdaps_idev, ABS_X, x - rest_x);
- input_report_abs(hdaps_idev, ABS_Y, y - rest_y);
- input_sync(hdaps_idev);
-
- mod_timer(&hdaps_timer, jiffies + HDAPS_POLL_PERIOD);
+ input_report_abs(input_dev, ABS_X, x - rest_x);
+ input_report_abs(input_dev, ABS_Y, y - rest_y);
+ input_sync(input_dev);
out:
mutex_unlock(&hdaps_mtx);
@@ -536,6 +530,7 @@
static int __init hdaps_init(void)
{
+ struct input_dev *idev;
int ret;
if (!dmi_check_system(hdaps_whitelist)) {
@@ -563,39 +558,37 @@
if (ret)
goto out_device;
- hdaps_idev = input_allocate_device();
+ hdaps_idev = input_allocate_polled_device();
if (!hdaps_idev) {
ret = -ENOMEM;
goto out_group;
}
+ hdaps_idev->poll = hdaps_mousedev_poll;
+ hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL;
+
/* initial calibrate for the input device */
hdaps_calibrate();
/* initialize the input class */
- hdaps_idev->name = "hdaps";
- hdaps_idev->dev.parent = &pdev->dev;
- hdaps_idev->evbit[0] = BIT(EV_ABS);
- input_set_abs_params(hdaps_idev, ABS_X,
+ idev = hdaps_idev->input;
+ idev->name = "hdaps";
+ idev->dev.parent = &pdev->dev;
+ idev->evbit[0] = BIT(EV_ABS);
+ input_set_abs_params(idev, ABS_X,
-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
- input_set_abs_params(hdaps_idev, ABS_Y,
+ input_set_abs_params(idev, ABS_Y,
-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
- ret = input_register_device(hdaps_idev);
+ ret = input_register_polled_device(hdaps_idev);
if (ret)
goto out_idev;
- /* start up our timer for the input device */
- init_timer(&hdaps_timer);
- hdaps_timer.function = hdaps_mousedev_poll;
- hdaps_timer.expires = jiffies + HDAPS_POLL_PERIOD;
- add_timer(&hdaps_timer);
-
printk(KERN_INFO "hdaps: driver successfully loaded.\n");
return 0;
out_idev:
- input_free_device(hdaps_idev);
+ input_free_polled_device(hdaps_idev);
out_group:
sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
out_device:
@@ -611,8 +604,8 @@
static void __exit hdaps_exit(void)
{
- del_timer_sync(&hdaps_timer);
- input_unregister_device(hdaps_idev);
+ input_unregister_polled_device(hdaps_idev);
+ input_free_polled_device(hdaps_idev);
sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
platform_device_unregister(pdev);
platform_driver_unregister(&hdaps_driver);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 6545fa7..1b3327a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -349,6 +349,7 @@
struct sk_buff_head queue;
struct neighbour *neighbour;
+ struct net_device *dev;
struct list_head list;
};
@@ -365,7 +366,8 @@
INFINIBAND_ALEN, sizeof(void *));
}
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
+ struct net_device *dev);
void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
extern struct workqueue_struct *ipoib_workqueue;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e072f3c..362610d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -517,7 +517,7 @@
struct ipoib_path *path;
struct ipoib_neigh *neigh;
- neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+ neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
if (!neigh) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -692,9 +692,10 @@
goto out;
}
} else if (neigh->ah) {
- if (unlikely(memcmp(&neigh->dgid.raw,
+ if (unlikely((memcmp(&neigh->dgid.raw,
skb->dst->neighbour->ha + 4,
- sizeof(union ib_gid)))) {
+ sizeof(union ib_gid))) ||
+ (neigh->dev != dev))) {
spin_lock(&priv->lock);
/*
* It's safe to call ipoib_put_ah() inside
@@ -817,6 +818,13 @@
unsigned long flags;
struct ipoib_ah *ah = NULL;
+ neigh = *to_ipoib_neigh(n);
+ if (neigh) {
+ priv = netdev_priv(neigh->dev);
+ ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n",
+ n->dev->name);
+ } else
+ return;
ipoib_dbg(priv,
"neigh_cleanup for %06x " IPOIB_GID_FMT "\n",
IPOIB_QPN(n->ha),
@@ -824,13 +832,10 @@
spin_lock_irqsave(&priv->lock, flags);
- neigh = *to_ipoib_neigh(n);
- if (neigh) {
- if (neigh->ah)
- ah = neigh->ah;
- list_del(&neigh->list);
- ipoib_neigh_free(n->dev, neigh);
- }
+ if (neigh->ah)
+ ah = neigh->ah;
+ list_del(&neigh->list);
+ ipoib_neigh_free(n->dev, neigh);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -838,7 +843,8 @@
ipoib_put_ah(ah);
}
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
+ struct net_device *dev)
{
struct ipoib_neigh *neigh;
@@ -847,6 +853,7 @@
return NULL;
neigh->neighbour = neighbour;
+ neigh->dev = dev;
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
ipoib_cm_set(neigh, NULL);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 827820e..9bcfc7a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -705,7 +705,8 @@
if (skb->dst &&
skb->dst->neighbour &&
!*to_ipoib_neigh(skb->dst->neighbour)) {
- struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+ struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour,
+ skb->dev);
if (neigh) {
kref_get(&mcast->ah->ref);
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 2d87357..63512d9 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -114,28 +114,6 @@
To compile this driver as a module, choose M here: the
module will be called joydev.
-config INPUT_TSDEV
- tristate "Touchscreen interface"
- ---help---
- Say Y here if you have an application that only can understand the
- Compaq touchscreen protocol for absolute pointer data. This is
- useful namely for embedded configurations.
-
- If unsure, say N.
-
- To compile this driver as a module, choose M here: the
- module will be called tsdev.
-
-config INPUT_TSDEV_SCREEN_X
- int "Horizontal screen resolution"
- depends on INPUT_TSDEV
- default "240"
-
-config INPUT_TSDEV_SCREEN_Y
- int "Vertical screen resolution"
- depends on INPUT_TSDEV
- default "320"
-
config INPUT_EVDEV
tristate "Event interface"
help
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 15eb752..99af903 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -13,7 +13,6 @@
obj-$(CONFIG_INPUT_MOUSEDEV) += mousedev.o
obj-$(CONFIG_INPUT_JOYDEV) += joydev.o
obj-$(CONFIG_INPUT_EVDEV) += evdev.o
-obj-$(CONFIG_INPUT_TSDEV) += tsdev.o
obj-$(CONFIG_INPUT_EVBUG) += evbug.o
obj-$(CONFIG_INPUT_KEYBOARD) += keyboard/
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index f1c3d6c..1d62c8b 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -30,6 +30,8 @@
wait_queue_head_t wait;
struct evdev_client *grab;
struct list_head client_list;
+ spinlock_t client_lock; /* protects client_list */
+ struct mutex mutex;
struct device dev;
};
@@ -37,39 +39,54 @@
struct input_event buffer[EVDEV_BUFFER_SIZE];
int head;
int tail;
+ spinlock_t buffer_lock; /* protects access to buffer, head and tail */
struct fasync_struct *fasync;
struct evdev *evdev;
struct list_head node;
};
static struct evdev *evdev_table[EVDEV_MINORS];
+static DEFINE_MUTEX(evdev_table_mutex);
-static void evdev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+static void evdev_pass_event(struct evdev_client *client,
+ struct input_event *event)
+{
+ /*
+ * Interrupts are disabled, just acquire the lock
+ */
+ spin_lock(&client->buffer_lock);
+ client->buffer[client->head++] = *event;
+ client->head &= EVDEV_BUFFER_SIZE - 1;
+ spin_unlock(&client->buffer_lock);
+
+ kill_fasync(&client->fasync, SIGIO, POLL_IN);
+}
+
+/*
+ * Pass incoming event to all connected clients.
+ */
+static void evdev_event(struct input_handle *handle,
+ unsigned int type, unsigned int code, int value)
{
struct evdev *evdev = handle->private;
struct evdev_client *client;
+ struct input_event event;
- if (evdev->grab) {
- client = evdev->grab;
+ do_gettimeofday(&event.time);
+ event.type = type;
+ event.code = code;
+ event.value = value;
- do_gettimeofday(&client->buffer[client->head].time);
- client->buffer[client->head].type = type;
- client->buffer[client->head].code = code;
- client->buffer[client->head].value = value;
- client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1);
+ rcu_read_lock();
- kill_fasync(&client->fasync, SIGIO, POLL_IN);
- } else
- list_for_each_entry(client, &evdev->client_list, node) {
+ client = rcu_dereference(evdev->grab);
+ if (client)
+ evdev_pass_event(client, &event);
+ else
+ list_for_each_entry_rcu(client, &evdev->client_list, node)
+ evdev_pass_event(client, &event);
- do_gettimeofday(&client->buffer[client->head].time);
- client->buffer[client->head].type = type;
- client->buffer[client->head].code = code;
- client->buffer[client->head].value = value;
- client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1);
-
- kill_fasync(&client->fasync, SIGIO, POLL_IN);
- }
+ rcu_read_unlock();
wake_up_interruptible(&evdev->wait);
}
@@ -88,38 +105,140 @@
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
+ int retval;
+
+ retval = mutex_lock_interruptible(&evdev->mutex);
+ if (retval)
+ return retval;
if (!evdev->exist)
- return -ENODEV;
+ retval = -ENODEV;
+ else
+ retval = input_flush_device(&evdev->handle, file);
- return input_flush_device(&evdev->handle, file);
+ mutex_unlock(&evdev->mutex);
+ return retval;
}
static void evdev_free(struct device *dev)
{
struct evdev *evdev = container_of(dev, struct evdev, dev);
- evdev_table[evdev->minor] = NULL;
kfree(evdev);
}
+/*
+ * Grabs an event device (along with underlying input device).
+ * This function is called with evdev->mutex taken.
+ */
+static int evdev_grab(struct evdev *evdev, struct evdev_client *client)
+{
+ int error;
+
+ if (evdev->grab)
+ return -EBUSY;
+
+ error = input_grab_device(&evdev->handle);
+ if (error)
+ return error;
+
+ rcu_assign_pointer(evdev->grab, client);
+ synchronize_rcu();
+
+ return 0;
+}
+
+static int evdev_ungrab(struct evdev *evdev, struct evdev_client *client)
+{
+ if (evdev->grab != client)
+ return -EINVAL;
+
+ rcu_assign_pointer(evdev->grab, NULL);
+ synchronize_rcu();
+ input_release_device(&evdev->handle);
+
+ return 0;
+}
+
+static void evdev_attach_client(struct evdev *evdev,
+ struct evdev_client *client)
+{
+ spin_lock(&evdev->client_lock);
+ list_add_tail_rcu(&client->node, &evdev->client_list);
+ spin_unlock(&evdev->client_lock);
+ synchronize_rcu();
+}
+
+static void evdev_detach_client(struct evdev *evdev,
+ struct evdev_client *client)
+{
+ spin_lock(&evdev->client_lock);
+ list_del_rcu(&client->node);
+ spin_unlock(&evdev->client_lock);
+ synchronize_rcu();
+}
+
+static int evdev_open_device(struct evdev *evdev)
+{
+ int retval;
+
+ retval = mutex_lock_interruptible(&evdev->mutex);
+ if (retval)
+ return retval;
+
+ if (!evdev->exist)
+ retval = -ENODEV;
+ else if (!evdev->open++) {
+ retval = input_open_device(&evdev->handle);
+ if (retval)
+ evdev->open--;
+ }
+
+ mutex_unlock(&evdev->mutex);
+ return retval;
+}
+
+static void evdev_close_device(struct evdev *evdev)
+{
+ mutex_lock(&evdev->mutex);
+
+ if (evdev->exist && !--evdev->open)
+ input_close_device(&evdev->handle);
+
+ mutex_unlock(&evdev->mutex);
+}
+
+/*
+ * Wake up users waiting for IO so they can disconnect from
+ * dead device.
+ */
+static void evdev_hangup(struct evdev *evdev)
+{
+ struct evdev_client *client;
+
+ spin_lock(&evdev->client_lock);
+ list_for_each_entry(client, &evdev->client_list, node)
+ kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+ spin_unlock(&evdev->client_lock);
+
+ wake_up_interruptible(&evdev->wait);
+}
+
static int evdev_release(struct inode *inode, struct file *file)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- if (evdev->grab == client) {
- input_release_device(&evdev->handle);
- evdev->grab = NULL;
- }
+ mutex_lock(&evdev->mutex);
+ if (evdev->grab == client)
+ evdev_ungrab(evdev, client);
+ mutex_unlock(&evdev->mutex);
evdev_fasync(-1, file, 0);
- list_del(&client->node);
+ evdev_detach_client(evdev, client);
kfree(client);
- if (!--evdev->open && evdev->exist)
- input_close_device(&evdev->handle);
-
+ evdev_close_device(evdev);
put_device(&evdev->dev);
return 0;
@@ -127,41 +246,44 @@
static int evdev_open(struct inode *inode, struct file *file)
{
- struct evdev_client *client;
struct evdev *evdev;
+ struct evdev_client *client;
int i = iminor(inode) - EVDEV_MINOR_BASE;
int error;
if (i >= EVDEV_MINORS)
return -ENODEV;
+ error = mutex_lock_interruptible(&evdev_table_mutex);
+ if (error)
+ return error;
evdev = evdev_table[i];
+ if (evdev)
+ get_device(&evdev->dev);
+ mutex_unlock(&evdev_table_mutex);
- if (!evdev || !evdev->exist)
+ if (!evdev)
return -ENODEV;
- get_device(&evdev->dev);
-
client = kzalloc(sizeof(struct evdev_client), GFP_KERNEL);
if (!client) {
error = -ENOMEM;
goto err_put_evdev;
}
+ spin_lock_init(&client->buffer_lock);
client->evdev = evdev;
- list_add_tail(&client->node, &evdev->client_list);
+ evdev_attach_client(evdev, client);
- if (!evdev->open++ && evdev->exist) {
- error = input_open_device(&evdev->handle);
- if (error)
- goto err_free_client;
- }
+ error = evdev_open_device(evdev);
+ if (error)
+ goto err_free_client;
file->private_data = client;
return 0;
err_free_client:
- list_del(&client->node);
+ evdev_detach_client(evdev, client);
kfree(client);
err_put_evdev:
put_device(&evdev->dev);
@@ -197,12 +319,14 @@
sizeof(struct input_event_compat) : sizeof(struct input_event);
}
-static int evdev_event_from_user(const char __user *buffer, struct input_event *event)
+static int evdev_event_from_user(const char __user *buffer,
+ struct input_event *event)
{
if (COMPAT_TEST) {
struct input_event_compat compat_event;
- if (copy_from_user(&compat_event, buffer, sizeof(struct input_event_compat)))
+ if (copy_from_user(&compat_event, buffer,
+ sizeof(struct input_event_compat)))
return -EFAULT;
event->time.tv_sec = compat_event.time.tv_sec;
@@ -219,7 +343,8 @@
return 0;
}
-static int evdev_event_to_user(char __user *buffer, const struct input_event *event)
+static int evdev_event_to_user(char __user *buffer,
+ const struct input_event *event)
{
if (COMPAT_TEST) {
struct input_event_compat compat_event;
@@ -230,7 +355,8 @@
compat_event.code = event->code;
compat_event.value = event->value;
- if (copy_to_user(buffer, &compat_event, sizeof(struct input_event_compat)))
+ if (copy_to_user(buffer, &compat_event,
+ sizeof(struct input_event_compat)))
return -EFAULT;
} else {
@@ -248,7 +374,8 @@
return sizeof(struct input_event);
}
-static int evdev_event_from_user(const char __user *buffer, struct input_event *event)
+static int evdev_event_from_user(const char __user *buffer,
+ struct input_event *event)
{
if (copy_from_user(event, buffer, sizeof(struct input_event)))
return -EFAULT;
@@ -256,7 +383,8 @@
return 0;
}
-static int evdev_event_to_user(char __user *buffer, const struct input_event *event)
+static int evdev_event_to_user(char __user *buffer,
+ const struct input_event *event)
{
if (copy_to_user(buffer, event, sizeof(struct input_event)))
return -EFAULT;
@@ -266,37 +394,71 @@
#endif /* CONFIG_COMPAT */
-static ssize_t evdev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+static ssize_t evdev_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
struct input_event event;
- int retval = 0;
+ int retval;
- if (!evdev->exist)
- return -ENODEV;
+ retval = mutex_lock_interruptible(&evdev->mutex);
+ if (retval)
+ return retval;
+
+ if (!evdev->exist) {
+ retval = -ENODEV;
+ goto out;
+ }
while (retval < count) {
- if (evdev_event_from_user(buffer + retval, &event))
- return -EFAULT;
- input_inject_event(&evdev->handle, event.type, event.code, event.value);
+ if (evdev_event_from_user(buffer + retval, &event)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ input_inject_event(&evdev->handle,
+ event.type, event.code, event.value);
retval += evdev_event_size();
}
+ out:
+ mutex_unlock(&evdev->mutex);
return retval;
}
-static ssize_t evdev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+static int evdev_fetch_next_event(struct evdev_client *client,
+ struct input_event *event)
+{
+ int have_event;
+
+ spin_lock_irq(&client->buffer_lock);
+
+ have_event = client->head != client->tail;
+ if (have_event) {
+ *event = client->buffer[client->tail++];
+ client->tail &= EVDEV_BUFFER_SIZE - 1;
+ }
+
+ spin_unlock_irq(&client->buffer_lock);
+
+ return have_event;
+}
+
+static ssize_t evdev_read(struct file *file, char __user *buffer,
+ size_t count, loff_t *ppos)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
+ struct input_event event;
int retval;
if (count < evdev_event_size())
return -EINVAL;
- if (client->head == client->tail && evdev->exist && (file->f_flags & O_NONBLOCK))
+ if (client->head == client->tail && evdev->exist &&
+ (file->f_flags & O_NONBLOCK))
return -EAGAIN;
retval = wait_event_interruptible(evdev->wait,
@@ -307,14 +469,12 @@
if (!evdev->exist)
return -ENODEV;
- while (client->head != client->tail && retval + evdev_event_size() <= count) {
+ while (retval + evdev_event_size() <= count &&
+ evdev_fetch_next_event(client, &event)) {
- struct input_event *event = (struct input_event *) client->buffer + client->tail;
-
- if (evdev_event_to_user(buffer + retval, event))
+ if (evdev_event_to_user(buffer + retval, &event))
return -EFAULT;
- client->tail = (client->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
retval += evdev_event_size();
}
@@ -409,8 +569,8 @@
return copy_to_user(p, str, len) ? -EFAULT : len;
}
-static long evdev_ioctl_handler(struct file *file, unsigned int cmd,
- void __user *p, int compat_mode)
+static long evdev_do_ioctl(struct file *file, unsigned int cmd,
+ void __user *p, int compat_mode)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
@@ -421,215 +581,289 @@
int i, t, u, v;
int error;
- if (!evdev->exist)
- return -ENODEV;
-
switch (cmd) {
- case EVIOCGVERSION:
- return put_user(EV_VERSION, ip);
+ case EVIOCGVERSION:
+ return put_user(EV_VERSION, ip);
- case EVIOCGID:
- if (copy_to_user(p, &dev->id, sizeof(struct input_id)))
- return -EFAULT;
- return 0;
+ case EVIOCGID:
+ if (copy_to_user(p, &dev->id, sizeof(struct input_id)))
+ return -EFAULT;
+ return 0;
- case EVIOCGREP:
- if (!test_bit(EV_REP, dev->evbit))
- return -ENOSYS;
- if (put_user(dev->rep[REP_DELAY], ip))
- return -EFAULT;
- if (put_user(dev->rep[REP_PERIOD], ip + 1))
- return -EFAULT;
- return 0;
+ case EVIOCGREP:
+ if (!test_bit(EV_REP, dev->evbit))
+ return -ENOSYS;
+ if (put_user(dev->rep[REP_DELAY], ip))
+ return -EFAULT;
+ if (put_user(dev->rep[REP_PERIOD], ip + 1))
+ return -EFAULT;
+ return 0;
- case EVIOCSREP:
- if (!test_bit(EV_REP, dev->evbit))
- return -ENOSYS;
- if (get_user(u, ip))
- return -EFAULT;
- if (get_user(v, ip + 1))
- return -EFAULT;
+ case EVIOCSREP:
+ if (!test_bit(EV_REP, dev->evbit))
+ return -ENOSYS;
+ if (get_user(u, ip))
+ return -EFAULT;
+ if (get_user(v, ip + 1))
+ return -EFAULT;
- input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u);
- input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v);
+ input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u);
+ input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v);
- return 0;
+ return 0;
- case EVIOCGKEYCODE:
- if (get_user(t, ip))
- return -EFAULT;
+ case EVIOCGKEYCODE:
+ if (get_user(t, ip))
+ return -EFAULT;
- error = dev->getkeycode(dev, t, &v);
- if (error)
- return error;
-
- if (put_user(v, ip + 1))
- return -EFAULT;
-
- return 0;
-
- case EVIOCSKEYCODE:
- if (get_user(t, ip) || get_user(v, ip + 1))
- return -EFAULT;
-
- return dev->setkeycode(dev, t, v);
-
- case EVIOCSFF:
- if (copy_from_user(&effect, p, sizeof(effect)))
- return -EFAULT;
-
- error = input_ff_upload(dev, &effect, file);
-
- if (put_user(effect.id, &(((struct ff_effect __user *)p)->id)))
- return -EFAULT;
-
+ error = dev->getkeycode(dev, t, &v);
+ if (error)
return error;
- case EVIOCRMFF:
- return input_ff_erase(dev, (int)(unsigned long) p, file);
+ if (put_user(v, ip + 1))
+ return -EFAULT;
- case EVIOCGEFFECTS:
- i = test_bit(EV_FF, dev->evbit) ? dev->ff->max_effects : 0;
- if (put_user(i, ip))
- return -EFAULT;
- return 0;
+ return 0;
- case EVIOCGRAB:
- if (p) {
- if (evdev->grab)
- return -EBUSY;
- if (input_grab_device(&evdev->handle))
- return -EBUSY;
- evdev->grab = client;
- return 0;
- } else {
- if (evdev->grab != client)
- return -EINVAL;
- input_release_device(&evdev->handle);
- evdev->grab = NULL;
+ case EVIOCSKEYCODE:
+ if (get_user(t, ip) || get_user(v, ip + 1))
+ return -EFAULT;
+
+ return dev->setkeycode(dev, t, v);
+
+ case EVIOCSFF:
+ if (copy_from_user(&effect, p, sizeof(effect)))
+ return -EFAULT;
+
+ error = input_ff_upload(dev, &effect, file);
+
+ if (put_user(effect.id, &(((struct ff_effect __user *)p)->id)))
+ return -EFAULT;
+
+ return error;
+
+ case EVIOCRMFF:
+ return input_ff_erase(dev, (int)(unsigned long) p, file);
+
+ case EVIOCGEFFECTS:
+ i = test_bit(EV_FF, dev->evbit) ?
+ dev->ff->max_effects : 0;
+ if (put_user(i, ip))
+ return -EFAULT;
+ return 0;
+
+ case EVIOCGRAB:
+ if (p)
+ return evdev_grab(evdev, client);
+ else
+ return evdev_ungrab(evdev, client);
+
+ default:
+
+ if (_IOC_TYPE(cmd) != 'E')
+ return -EINVAL;
+
+ if (_IOC_DIR(cmd) == _IOC_READ) {
+
+ if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0, 0))) {
+
+ unsigned long *bits;
+ int len;
+
+ switch (_IOC_NR(cmd) & EV_MAX) {
+
+ case 0: bits = dev->evbit; len = EV_MAX; break;
+ case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
+ case EV_REL: bits = dev->relbit; len = REL_MAX; break;
+ case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
+ case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
+ case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
+ case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
+ case EV_FF: bits = dev->ffbit; len = FF_MAX; break;
+ case EV_SW: bits = dev->swbit; len = SW_MAX; break;
+ default: return -EINVAL;
+ }
+ return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode);
+ }
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0)))
+ return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd),
+ p, compat_mode);
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0)))
+ return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd),
+ p, compat_mode);
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
+ return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd),
+ p, compat_mode);
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0)))
+ return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd),
+ p, compat_mode);
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0)))
+ return str_to_user(dev->name, _IOC_SIZE(cmd), p);
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0)))
+ return str_to_user(dev->phys, _IOC_SIZE(cmd), p);
+
+ if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0)))
+ return str_to_user(dev->uniq, _IOC_SIZE(cmd), p);
+
+ if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+
+ t = _IOC_NR(cmd) & ABS_MAX;
+
+ abs.value = dev->abs[t];
+ abs.minimum = dev->absmin[t];
+ abs.maximum = dev->absmax[t];
+ abs.fuzz = dev->absfuzz[t];
+ abs.flat = dev->absflat[t];
+
+ if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+ return -EFAULT;
+
return 0;
}
- default:
+ }
- if (_IOC_TYPE(cmd) != 'E')
- return -EINVAL;
+ if (_IOC_DIR(cmd) == _IOC_WRITE) {
- if (_IOC_DIR(cmd) == _IOC_READ) {
+ if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
- if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
+ t = _IOC_NR(cmd) & ABS_MAX;
- unsigned long *bits;
- int len;
+ if (copy_from_user(&abs, p,
+ sizeof(struct input_absinfo)))
+ return -EFAULT;
- switch (_IOC_NR(cmd) & EV_MAX) {
- case 0: bits = dev->evbit; len = EV_MAX; break;
- case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
- case EV_REL: bits = dev->relbit; len = REL_MAX; break;
- case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
- case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
- case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
- case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
- case EV_FF: bits = dev->ffbit; len = FF_MAX; break;
- case EV_SW: bits = dev->swbit; len = SW_MAX; break;
- default: return -EINVAL;
- }
- return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode);
- }
+ /*
+ * Take event lock to ensure that we are not
+ * changing device parameters in the middle
+ * of event.
+ */
+ spin_lock_irq(&dev->event_lock);
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0)))
- return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd),
- p, compat_mode);
+ dev->abs[t] = abs.value;
+ dev->absmin[t] = abs.minimum;
+ dev->absmax[t] = abs.maximum;
+ dev->absfuzz[t] = abs.fuzz;
+ dev->absflat[t] = abs.flat;
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0)))
- return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd),
- p, compat_mode);
+ spin_unlock_irq(&dev->event_lock);
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
- return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd),
- p, compat_mode);
-
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0)))
- return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd),
- p, compat_mode);
-
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0)))
- return str_to_user(dev->name, _IOC_SIZE(cmd), p);
-
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0)))
- return str_to_user(dev->phys, _IOC_SIZE(cmd), p);
-
- if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0)))
- return str_to_user(dev->uniq, _IOC_SIZE(cmd), p);
-
- if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
-
- t = _IOC_NR(cmd) & ABS_MAX;
-
- abs.value = dev->abs[t];
- abs.minimum = dev->absmin[t];
- abs.maximum = dev->absmax[t];
- abs.fuzz = dev->absfuzz[t];
- abs.flat = dev->absflat[t];
-
- if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
- return -EFAULT;
-
- return 0;
- }
-
+ return 0;
}
-
- if (_IOC_DIR(cmd) == _IOC_WRITE) {
-
- if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
-
- t = _IOC_NR(cmd) & ABS_MAX;
-
- if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
- return -EFAULT;
-
- dev->abs[t] = abs.value;
- dev->absmin[t] = abs.minimum;
- dev->absmax[t] = abs.maximum;
- dev->absfuzz[t] = abs.fuzz;
- dev->absflat[t] = abs.flat;
-
- return 0;
- }
- }
+ }
}
return -EINVAL;
}
+static long evdev_ioctl_handler(struct file *file, unsigned int cmd,
+ void __user *p, int compat_mode)
+{
+ struct evdev_client *client = file->private_data;
+ struct evdev *evdev = client->evdev;
+ int retval;
+
+ retval = mutex_lock_interruptible(&evdev->mutex);
+ if (retval)
+ return retval;
+
+ if (!evdev->exist) {
+ retval = -ENODEV;
+ goto out;
+ }
+
+ retval = evdev_do_ioctl(file, cmd, p, compat_mode);
+
+ out:
+ mutex_unlock(&evdev->mutex);
+ return retval;
+}
+
static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
return evdev_ioctl_handler(file, cmd, (void __user *)arg, 0);
}
#ifdef CONFIG_COMPAT
-static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
+static long evdev_ioctl_compat(struct file *file,
+ unsigned int cmd, unsigned long arg)
{
return evdev_ioctl_handler(file, cmd, compat_ptr(arg), 1);
}
#endif
static const struct file_operations evdev_fops = {
- .owner = THIS_MODULE,
- .read = evdev_read,
- .write = evdev_write,
- .poll = evdev_poll,
- .open = evdev_open,
- .release = evdev_release,
- .unlocked_ioctl = evdev_ioctl,
+ .owner = THIS_MODULE,
+ .read = evdev_read,
+ .write = evdev_write,
+ .poll = evdev_poll,
+ .open = evdev_open,
+ .release = evdev_release,
+ .unlocked_ioctl = evdev_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = evdev_ioctl_compat,
+ .compat_ioctl = evdev_ioctl_compat,
#endif
- .fasync = evdev_fasync,
- .flush = evdev_flush
+ .fasync = evdev_fasync,
+ .flush = evdev_flush
};
+static int evdev_install_chrdev(struct evdev *evdev)
+{
+ /*
+ * No need to do any locking here as calls to connect and
+ * disconnect are serialized by the input core
+ */
+ evdev_table[evdev->minor] = evdev;
+ return 0;
+}
+
+static void evdev_remove_chrdev(struct evdev *evdev)
+{
+ /*
+ * Lock evdev table to prevent race with evdev_open()
+ */
+ mutex_lock(&evdev_table_mutex);
+ evdev_table[evdev->minor] = NULL;
+ mutex_unlock(&evdev_table_mutex);
+}
+
+/*
+ * Mark device non-existent. This disables writes, ioctls and
+ * prevents new users from opening the device. Already posted
+ * blocking reads will stay, however new ones will fail.
+ */
+static void evdev_mark_dead(struct evdev *evdev)
+{
+ mutex_lock(&evdev->mutex);
+ evdev->exist = 0;
+ mutex_unlock(&evdev->mutex);
+}
+
+static void evdev_cleanup(struct evdev *evdev)
+{
+ struct input_handle *handle = &evdev->handle;
+
+ evdev_mark_dead(evdev);
+ evdev_hangup(evdev);
+ evdev_remove_chrdev(evdev);
+
+ /* evdev is marked dead so no one else accesses evdev->open */
+ if (evdev->open) {
+ input_flush_device(handle, NULL);
+ input_close_device(handle);
+ }
+}
+
+/*
+ * Create new evdev device. Note that input core serializes calls
+ * to connect and disconnect so we don't need to lock evdev_table here.
+ */
static int evdev_connect(struct input_handler *handler, struct input_dev *dev,
const struct input_device_id *id)
{
@@ -637,7 +871,10 @@
int minor;
int error;
- for (minor = 0; minor < EVDEV_MINORS && evdev_table[minor]; minor++);
+ for (minor = 0; minor < EVDEV_MINORS; minor++)
+ if (!evdev_table[minor])
+ break;
+
if (minor == EVDEV_MINORS) {
printk(KERN_ERR "evdev: no more free evdev devices\n");
return -ENFILE;
@@ -648,38 +885,44 @@
return -ENOMEM;
INIT_LIST_HEAD(&evdev->client_list);
+ spin_lock_init(&evdev->client_lock);
+ mutex_init(&evdev->mutex);
init_waitqueue_head(&evdev->wait);
+ snprintf(evdev->name, sizeof(evdev->name), "event%d", minor);
evdev->exist = 1;
evdev->minor = minor;
+
evdev->handle.dev = dev;
evdev->handle.name = evdev->name;
evdev->handle.handler = handler;
evdev->handle.private = evdev;
- snprintf(evdev->name, sizeof(evdev->name), "event%d", minor);
- snprintf(evdev->dev.bus_id, sizeof(evdev->dev.bus_id),
- "event%d", minor);
+ strlcpy(evdev->dev.bus_id, evdev->name, sizeof(evdev->dev.bus_id));
+ evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor);
evdev->dev.class = &input_class;
evdev->dev.parent = &dev->dev;
- evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor);
evdev->dev.release = evdev_free;
device_initialize(&evdev->dev);
- evdev_table[minor] = evdev;
-
- error = device_add(&evdev->dev);
+ error = input_register_handle(&evdev->handle);
if (error)
goto err_free_evdev;
- error = input_register_handle(&evdev->handle);
+ error = evdev_install_chrdev(evdev);
if (error)
- goto err_delete_evdev;
+ goto err_unregister_handle;
+
+ error = device_add(&evdev->dev);
+ if (error)
+ goto err_cleanup_evdev;
return 0;
- err_delete_evdev:
- device_del(&evdev->dev);
+ err_cleanup_evdev:
+ evdev_cleanup(evdev);
+ err_unregister_handle:
+ input_unregister_handle(&evdev->handle);
err_free_evdev:
put_device(&evdev->dev);
return error;
@@ -688,21 +931,10 @@
static void evdev_disconnect(struct input_handle *handle)
{
struct evdev *evdev = handle->private;
- struct evdev_client *client;
- input_unregister_handle(handle);
device_del(&evdev->dev);
-
- evdev->exist = 0;
-
- if (evdev->open) {
- input_flush_device(handle, NULL);
- input_close_device(handle);
- list_for_each_entry(client, &evdev->client_list, node)
- kill_fasync(&client->fasync, SIGIO, POLL_HUP);
- wake_up_interruptible(&evdev->wait);
- }
-
+ evdev_cleanup(evdev);
+ input_unregister_handle(handle);
put_device(&evdev->dev);
}
@@ -714,13 +946,13 @@
MODULE_DEVICE_TABLE(input, evdev_ids);
static struct input_handler evdev_handler = {
- .event = evdev_event,
- .connect = evdev_connect,
- .disconnect = evdev_disconnect,
- .fops = &evdev_fops,
- .minor = EVDEV_MINOR_BASE,
- .name = "evdev",
- .id_table = evdev_ids,
+ .event = evdev_event,
+ .connect = evdev_connect,
+ .disconnect = evdev_disconnect,
+ .fops = &evdev_fops,
+ .minor = EVDEV_MINOR_BASE,
+ .name = "evdev",
+ .id_table = evdev_ids,
};
static int __init evdev_init(void)
diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index b773d4c..92b3598 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -70,6 +70,7 @@
{
struct input_polled_dev *dev = input->private;
int error;
+ unsigned long ticks;
error = input_polldev_start_workqueue();
if (error)
@@ -78,8 +79,10 @@
if (dev->flush)
dev->flush(dev);
- queue_delayed_work(polldev_wq, &dev->work,
- msecs_to_jiffies(dev->poll_interval));
+ ticks = msecs_to_jiffies(dev->poll_interval);
+ if (ticks >= HZ)
+ ticks = round_jiffies(ticks);
+ queue_delayed_work(polldev_wq, &dev->work, ticks);
return 0;
}
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 5dc361c..2f2b020 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -17,10 +17,10 @@
#include <linux/major.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/device.h>
#include <linux/mutex.h>
+#include <linux/rcupdate.h>
MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
MODULE_DESCRIPTION("Input core");
@@ -31,8 +31,222 @@
static LIST_HEAD(input_dev_list);
static LIST_HEAD(input_handler_list);
+/*
+ * input_mutex protects access to both input_dev_list and input_handler_list.
+ * This also causes input_[un]register_device and input_[un]register_handler
+ * be mutually exclusive which simplifies locking in drivers implementing
+ * input handlers.
+ */
+static DEFINE_MUTEX(input_mutex);
+
static struct input_handler *input_table[8];
+static inline int is_event_supported(unsigned int code,
+ unsigned long *bm, unsigned int max)
+{
+ return code <= max && test_bit(code, bm);
+}
+
+static int input_defuzz_abs_event(int value, int old_val, int fuzz)
+{
+ if (fuzz) {
+ if (value > old_val - fuzz / 2 && value < old_val + fuzz / 2)
+ return old_val;
+
+ if (value > old_val - fuzz && value < old_val + fuzz)
+ return (old_val * 3 + value) / 4;
+
+ if (value > old_val - fuzz * 2 && value < old_val + fuzz * 2)
+ return (old_val + value) / 2;
+ }
+
+ return value;
+}
+
+/*
+ * Pass event through all open handles. This function is called with
+ * dev->event_lock held and interrupts disabled.
+ */
+static void input_pass_event(struct input_dev *dev,
+ unsigned int type, unsigned int code, int value)
+{
+ struct input_handle *handle;
+
+ rcu_read_lock();
+
+ handle = rcu_dereference(dev->grab);
+ if (handle)
+ handle->handler->event(handle, type, code, value);
+ else
+ list_for_each_entry_rcu(handle, &dev->h_list, d_node)
+ if (handle->open)
+ handle->handler->event(handle,
+ type, code, value);
+ rcu_read_unlock();
+}
+
+/*
+ * Generate software autorepeat event. Note that we take
+ * dev->event_lock here to avoid racing with input_event
+ * which may cause keys get "stuck".
+ */
+static void input_repeat_key(unsigned long data)
+{
+ struct input_dev *dev = (void *) data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+ if (test_bit(dev->repeat_key, dev->key) &&
+ is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) {
+
+ input_pass_event(dev, EV_KEY, dev->repeat_key, 2);
+
+ if (dev->sync) {
+ /*
+ * Only send SYN_REPORT if we are not in a middle
+ * of driver parsing a new hardware packet.
+ * Otherwise assume that the driver will send
+ * SYN_REPORT once it's done.
+ */
+ input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
+ }
+
+ if (dev->rep[REP_PERIOD])
+ mod_timer(&dev->timer, jiffies +
+ msecs_to_jiffies(dev->rep[REP_PERIOD]));
+ }
+
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+static void input_start_autorepeat(struct input_dev *dev, int code)
+{
+ if (test_bit(EV_REP, dev->evbit) &&
+ dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] &&
+ dev->timer.data) {
+ dev->repeat_key = code;
+ mod_timer(&dev->timer,
+ jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]));
+ }
+}
+
+#define INPUT_IGNORE_EVENT 0
+#define INPUT_PASS_TO_HANDLERS 1
+#define INPUT_PASS_TO_DEVICE 2
+#define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE)
+
+static void input_handle_event(struct input_dev *dev,
+ unsigned int type, unsigned int code, int value)
+{
+ int disposition = INPUT_IGNORE_EVENT;
+
+ switch (type) {
+
+ case EV_SYN:
+ switch (code) {
+ case SYN_CONFIG:
+ disposition = INPUT_PASS_TO_ALL;
+ break;
+
+ case SYN_REPORT:
+ if (!dev->sync) {
+ dev->sync = 1;
+ disposition = INPUT_PASS_TO_HANDLERS;
+ }
+ break;
+ }
+ break;
+
+ case EV_KEY:
+ if (is_event_supported(code, dev->keybit, KEY_MAX) &&
+ !!test_bit(code, dev->key) != value) {
+
+ if (value != 2) {
+ __change_bit(code, dev->key);
+ if (value)
+ input_start_autorepeat(dev, code);
+ }
+
+ disposition = INPUT_PASS_TO_HANDLERS;
+ }
+ break;
+
+ case EV_SW:
+ if (is_event_supported(code, dev->swbit, SW_MAX) &&
+ !!test_bit(code, dev->sw) != value) {
+
+ __change_bit(code, dev->sw);
+ disposition = INPUT_PASS_TO_HANDLERS;
+ }
+ break;
+
+ case EV_ABS:
+ if (is_event_supported(code, dev->absbit, ABS_MAX)) {
+
+ value = input_defuzz_abs_event(value,
+ dev->abs[code], dev->absfuzz[code]);
+
+ if (dev->abs[code] != value) {
+ dev->abs[code] = value;
+ disposition = INPUT_PASS_TO_HANDLERS;
+ }
+ }
+ break;
+
+ case EV_REL:
+ if (is_event_supported(code, dev->relbit, REL_MAX) && value)
+ disposition = INPUT_PASS_TO_HANDLERS;
+
+ break;
+
+ case EV_MSC:
+ if (is_event_supported(code, dev->mscbit, MSC_MAX))
+ disposition = INPUT_PASS_TO_ALL;
+
+ break;
+
+ case EV_LED:
+ if (is_event_supported(code, dev->ledbit, LED_MAX) &&
+ !!test_bit(code, dev->led) != value) {
+
+ __change_bit(code, dev->led);
+ disposition = INPUT_PASS_TO_ALL;
+ }
+ break;
+
+ case EV_SND:
+ if (is_event_supported(code, dev->sndbit, SND_MAX)) {
+
+ if (!!test_bit(code, dev->snd) != !!value)
+ __change_bit(code, dev->snd);
+ disposition = INPUT_PASS_TO_ALL;
+ }
+ break;
+
+ case EV_REP:
+ if (code <= REP_MAX && value >= 0 && dev->rep[code] != value) {
+ dev->rep[code] = value;
+ disposition = INPUT_PASS_TO_ALL;
+ }
+ break;
+
+ case EV_FF:
+ if (value >= 0)
+ disposition = INPUT_PASS_TO_ALL;
+ break;
+ }
+
+ if (type != EV_SYN)
+ dev->sync = 0;
+
+ if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event)
+ dev->event(dev, type, code, value);
+
+ if (disposition & INPUT_PASS_TO_HANDLERS)
+ input_pass_event(dev, type, code, value);
+}
+
/**
* input_event() - report new input event
* @dev: device that generated the event
@@ -40,158 +254,22 @@
* @code: event code
* @value: value of the event
*
- * This function should be used by drivers implementing various input devices
- * See also input_inject_event()
+ * This function should be used by drivers implementing various input
+ * devices. See also input_inject_event().
*/
-void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+
+void input_event(struct input_dev *dev,
+ unsigned int type, unsigned int code, int value)
{
- struct input_handle *handle;
+ unsigned long flags;
- if (type > EV_MAX || !test_bit(type, dev->evbit))
- return;
+ if (is_event_supported(type, dev->evbit, EV_MAX)) {
- add_input_randomness(type, code, value);
-
- switch (type) {
-
- case EV_SYN:
- switch (code) {
- case SYN_CONFIG:
- if (dev->event)
- dev->event(dev, type, code, value);
- break;
-
- case SYN_REPORT:
- if (dev->sync)
- return;
- dev->sync = 1;
- break;
- }
- break;
-
- case EV_KEY:
-
- if (code > KEY_MAX || !test_bit(code, dev->keybit) || !!test_bit(code, dev->key) == value)
- return;
-
- if (value == 2)
- break;
-
- change_bit(code, dev->key);
-
- if (test_bit(EV_REP, dev->evbit) && dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && dev->timer.data && value) {
- dev->repeat_key = code;
- mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]));
- }
-
- break;
-
- case EV_SW:
-
- if (code > SW_MAX || !test_bit(code, dev->swbit) || !!test_bit(code, dev->sw) == value)
- return;
-
- change_bit(code, dev->sw);
-
- break;
-
- case EV_ABS:
-
- if (code > ABS_MAX || !test_bit(code, dev->absbit))
- return;
-
- if (dev->absfuzz[code]) {
- if ((value > dev->abs[code] - (dev->absfuzz[code] >> 1)) &&
- (value < dev->abs[code] + (dev->absfuzz[code] >> 1)))
- return;
-
- if ((value > dev->abs[code] - dev->absfuzz[code]) &&
- (value < dev->abs[code] + dev->absfuzz[code]))
- value = (dev->abs[code] * 3 + value) >> 2;
-
- if ((value > dev->abs[code] - (dev->absfuzz[code] << 1)) &&
- (value < dev->abs[code] + (dev->absfuzz[code] << 1)))
- value = (dev->abs[code] + value) >> 1;
- }
-
- if (dev->abs[code] == value)
- return;
-
- dev->abs[code] = value;
- break;
-
- case EV_REL:
-
- if (code > REL_MAX || !test_bit(code, dev->relbit) || (value == 0))
- return;
-
- break;
-
- case EV_MSC:
-
- if (code > MSC_MAX || !test_bit(code, dev->mscbit))
- return;
-
- if (dev->event)
- dev->event(dev, type, code, value);
-
- break;
-
- case EV_LED:
-
- if (code > LED_MAX || !test_bit(code, dev->ledbit) || !!test_bit(code, dev->led) == value)
- return;
-
- change_bit(code, dev->led);
-
- if (dev->event)
- dev->event(dev, type, code, value);
-
- break;
-
- case EV_SND:
-
- if (code > SND_MAX || !test_bit(code, dev->sndbit))
- return;
-
- if (!!test_bit(code, dev->snd) != !!value)
- change_bit(code, dev->snd);
-
- if (dev->event)
- dev->event(dev, type, code, value);
-
- break;
-
- case EV_REP:
-
- if (code > REP_MAX || value < 0 || dev->rep[code] == value)
- return;
-
- dev->rep[code] = value;
- if (dev->event)
- dev->event(dev, type, code, value);
-
- break;
-
- case EV_FF:
-
- if (value < 0)
- return;
-
- if (dev->event)
- dev->event(dev, type, code, value);
- break;
+ spin_lock_irqsave(&dev->event_lock, flags);
+ add_input_randomness(type, code, value);
+ input_handle_event(dev, type, code, value);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
}
-
- if (type != EV_SYN)
- dev->sync = 0;
-
- if (dev->grab)
- dev->grab->handler->event(dev->grab, type, code, value);
- else
- list_for_each_entry(handle, &dev->h_list, d_node)
- if (handle->open)
- handle->handler->event(handle, type, code, value);
}
EXPORT_SYMBOL(input_event);
@@ -202,102 +280,228 @@
* @code: event code
* @value: value of the event
*
- * Similar to input_event() but will ignore event if device is "grabbed" and handle
- * injecting event is not the one that owns the device.
+ * Similar to input_event() but will ignore event if device is
+ * "grabbed" and handle injecting event is not the one that owns
+ * the device.
*/
-void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+void input_inject_event(struct input_handle *handle,
+ unsigned int type, unsigned int code, int value)
{
- if (!handle->dev->grab || handle->dev->grab == handle)
- input_event(handle->dev, type, code, value);
+ struct input_dev *dev = handle->dev;
+ struct input_handle *grab;
+ unsigned long flags;
+
+ if (is_event_supported(type, dev->evbit, EV_MAX)) {
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+ rcu_read_lock();
+ grab = rcu_dereference(dev->grab);
+ if (!grab || grab == handle)
+ input_handle_event(dev, type, code, value);
+ rcu_read_unlock();
+
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
}
EXPORT_SYMBOL(input_inject_event);
-static void input_repeat_key(unsigned long data)
-{
- struct input_dev *dev = (void *) data;
-
- if (!test_bit(dev->repeat_key, dev->key))
- return;
-
- input_event(dev, EV_KEY, dev->repeat_key, 2);
- input_sync(dev);
-
- if (dev->rep[REP_PERIOD])
- mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_PERIOD]));
-}
-
+/**
+ * input_grab_device - grabs device for exclusive use
+ * @handle: input handle that wants to own the device
+ *
+ * When a device is grabbed by an input handle all events generated by
+ * the device are delivered only to this handle. Also events injected
+ * by other input handles are ignored while device is grabbed.
+ */
int input_grab_device(struct input_handle *handle)
{
- if (handle->dev->grab)
- return -EBUSY;
+ struct input_dev *dev = handle->dev;
+ int retval;
- handle->dev->grab = handle;
- return 0;
+ retval = mutex_lock_interruptible(&dev->mutex);
+ if (retval)
+ return retval;
+
+ if (dev->grab) {
+ retval = -EBUSY;
+ goto out;
+ }
+
+ rcu_assign_pointer(dev->grab, handle);
+ synchronize_rcu();
+
+ out:
+ mutex_unlock(&dev->mutex);
+ return retval;
}
EXPORT_SYMBOL(input_grab_device);
-void input_release_device(struct input_handle *handle)
+static void __input_release_device(struct input_handle *handle)
{
struct input_dev *dev = handle->dev;
if (dev->grab == handle) {
- dev->grab = NULL;
+ rcu_assign_pointer(dev->grab, NULL);
+ /* Make sure input_pass_event() notices that grab is gone */
+ synchronize_rcu();
list_for_each_entry(handle, &dev->h_list, d_node)
- if (handle->handler->start)
+ if (handle->open && handle->handler->start)
handle->handler->start(handle);
}
}
+
+/**
+ * input_release_device - release previously grabbed device
+ * @handle: input handle that owns the device
+ *
+ * Releases previously grabbed device so that other input handles can
+ * start receiving input events. Upon release all handlers attached
+ * to the device have their start() method called so they have a change
+ * to synchronize device state with the rest of the system.
+ */
+void input_release_device(struct input_handle *handle)
+{
+ struct input_dev *dev = handle->dev;
+
+ mutex_lock(&dev->mutex);
+ __input_release_device(handle);
+ mutex_unlock(&dev->mutex);
+}
EXPORT_SYMBOL(input_release_device);
+/**
+ * input_open_device - open input device
+ * @handle: handle through which device is being accessed
+ *
+ * This function should be called by input handlers when they
+ * want to start receive events from given input device.
+ */
int input_open_device(struct input_handle *handle)
{
struct input_dev *dev = handle->dev;
- int err;
+ int retval;
- err = mutex_lock_interruptible(&dev->mutex);
- if (err)
- return err;
+ retval = mutex_lock_interruptible(&dev->mutex);
+ if (retval)
+ return retval;
+
+ if (dev->going_away) {
+ retval = -ENODEV;
+ goto out;
+ }
handle->open++;
if (!dev->users++ && dev->open)
- err = dev->open(dev);
+ retval = dev->open(dev);
- if (err)
- handle->open--;
+ if (retval) {
+ dev->users--;
+ if (!--handle->open) {
+ /*
+ * Make sure we are not delivering any more events
+ * through this handle
+ */
+ synchronize_rcu();
+ }
+ }
+ out:
mutex_unlock(&dev->mutex);
-
- return err;
+ return retval;
}
EXPORT_SYMBOL(input_open_device);
-int input_flush_device(struct input_handle* handle, struct file* file)
+int input_flush_device(struct input_handle *handle, struct file *file)
{
- if (handle->dev->flush)
- return handle->dev->flush(handle->dev, file);
+ struct input_dev *dev = handle->dev;
+ int retval;
- return 0;
+ retval = mutex_lock_interruptible(&dev->mutex);
+ if (retval)
+ return retval;
+
+ if (dev->flush)
+ retval = dev->flush(dev, file);
+
+ mutex_unlock(&dev->mutex);
+ return retval;
}
EXPORT_SYMBOL(input_flush_device);
+/**
+ * input_close_device - close input device
+ * @handle: handle through which device is being accessed
+ *
+ * This function should be called by input handlers when they
+ * want to stop receive events from given input device.
+ */
void input_close_device(struct input_handle *handle)
{
struct input_dev *dev = handle->dev;
- input_release_device(handle);
-
mutex_lock(&dev->mutex);
+ __input_release_device(handle);
+
if (!--dev->users && dev->close)
dev->close(dev);
- handle->open--;
+
+ if (!--handle->open) {
+ /*
+ * synchronize_rcu() makes sure that input_pass_event()
+ * completed and that no more input events are delivered
+ * through this handle
+ */
+ synchronize_rcu();
+ }
mutex_unlock(&dev->mutex);
}
EXPORT_SYMBOL(input_close_device);
+/*
+ * Prepare device for unregistering
+ */
+static void input_disconnect_device(struct input_dev *dev)
+{
+ struct input_handle *handle;
+ int code;
+
+ /*
+ * Mark device as going away. Note that we take dev->mutex here
+ * not to protect access to dev->going_away but rather to ensure
+ * that there are no threads in the middle of input_open_device()
+ */
+ mutex_lock(&dev->mutex);
+ dev->going_away = 1;
+ mutex_unlock(&dev->mutex);
+
+ spin_lock_irq(&dev->event_lock);
+
+ /*
+ * Simulate keyup events for all pressed keys so that handlers
+ * are not left with "stuck" keys. The driver may continue
+ * generate events even after we done here but they will not
+ * reach any handlers.
+ */
+ if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) {
+ for (code = 0; code <= KEY_MAX; code++) {
+ if (is_event_supported(code, dev->keybit, KEY_MAX) &&
+ test_bit(code, dev->key)) {
+ input_pass_event(dev, EV_KEY, code, 0);
+ }
+ }
+ input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
+ }
+
+ list_for_each_entry(handle, &dev->h_list, d_node)
+ handle->open = 0;
+
+ spin_unlock_irq(&dev->event_lock);
+}
+
static int input_fetch_keycode(struct input_dev *dev, int scancode)
{
switch (dev->keycodesize) {
@@ -473,7 +677,8 @@
static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
{
- /* acquire lock here ... Yes, we do need locking, I knowi, I know... */
+ if (mutex_lock_interruptible(&input_mutex))
+ return NULL;
return seq_list_start(&input_dev_list, *pos);
}
@@ -485,7 +690,7 @@
static void input_devices_seq_stop(struct seq_file *seq, void *v)
{
- /* release lock here */
+ mutex_unlock(&input_mutex);
}
static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
@@ -569,7 +774,9 @@
static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
{
- /* acquire lock here ... Yes, we do need locking, I knowi, I know... */
+ if (mutex_lock_interruptible(&input_mutex))
+ return NULL;
+
seq->private = (void *)(unsigned long)*pos;
return seq_list_start(&input_handler_list, *pos);
}
@@ -582,7 +789,7 @@
static void input_handlers_seq_stop(struct seq_file *seq, void *v)
{
- /* release lock here */
+ mutex_unlock(&input_mutex);
}
static int input_handlers_seq_show(struct seq_file *seq, void *v)
@@ -983,6 +1190,7 @@
dev->dev.class = &input_class;
device_initialize(&dev->dev);
mutex_init(&dev->mutex);
+ spin_lock_init(&dev->event_lock);
INIT_LIST_HEAD(&dev->h_list);
INIT_LIST_HEAD(&dev->node);
@@ -1000,7 +1208,7 @@
* This function should only be used if input_register_device()
* was not called yet or if it failed. Once device was registered
* use input_unregister_device() and memory will be freed once last
- * refrence to the device is dropped.
+ * reference to the device is dropped.
*
* Device should be allocated by input_allocate_device().
*
@@ -1070,6 +1278,18 @@
}
EXPORT_SYMBOL(input_set_capability);
+/**
+ * input_register_device - register device with input core
+ * @dev: device to be registered
+ *
+ * This function registers device with input core. The device must be
+ * allocated with input_allocate_device() and all it's capabilities
+ * set up before registering.
+ * If function fails the device must be freed with input_free_device().
+ * Once device has been successfully registered it can be unregistered
+ * with input_unregister_device(); input_free_device() should not be
+ * called in this case.
+ */
int input_register_device(struct input_dev *dev)
{
static atomic_t input_no = ATOMIC_INIT(0);
@@ -1077,7 +1297,7 @@
const char *path;
int error;
- set_bit(EV_SYN, dev->evbit);
+ __set_bit(EV_SYN, dev->evbit);
/*
* If delay and period are pre-set by the driver, then autorepeating
@@ -1098,8 +1318,6 @@
if (!dev->setkeycode)
dev->setkeycode = input_default_setkeycode;
- list_add_tail(&dev->node, &input_dev_list);
-
snprintf(dev->dev.bus_id, sizeof(dev->dev.bus_id),
"input%ld", (unsigned long) atomic_inc_return(&input_no) - 1);
@@ -1115,49 +1333,79 @@
dev->name ? dev->name : "Unspecified device", path ? path : "N/A");
kfree(path);
+ error = mutex_lock_interruptible(&input_mutex);
+ if (error) {
+ device_del(&dev->dev);
+ return error;
+ }
+
+ list_add_tail(&dev->node, &input_dev_list);
+
list_for_each_entry(handler, &input_handler_list, node)
input_attach_handler(dev, handler);
input_wakeup_procfs_readers();
+ mutex_unlock(&input_mutex);
+
return 0;
}
EXPORT_SYMBOL(input_register_device);
+/**
+ * input_unregister_device - unregister previously registered device
+ * @dev: device to be unregistered
+ *
+ * This function unregisters an input device. Once device is unregistered
+ * the caller should not try to access it as it may get freed at any moment.
+ */
void input_unregister_device(struct input_dev *dev)
{
struct input_handle *handle, *next;
- int code;
- for (code = 0; code <= KEY_MAX; code++)
- if (test_bit(code, dev->key))
- input_report_key(dev, code, 0);
- input_sync(dev);
+ input_disconnect_device(dev);
- del_timer_sync(&dev->timer);
+ mutex_lock(&input_mutex);
list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
handle->handler->disconnect(handle);
WARN_ON(!list_empty(&dev->h_list));
+ del_timer_sync(&dev->timer);
list_del_init(&dev->node);
- device_unregister(&dev->dev);
-
input_wakeup_procfs_readers();
+
+ mutex_unlock(&input_mutex);
+
+ device_unregister(&dev->dev);
}
EXPORT_SYMBOL(input_unregister_device);
+/**
+ * input_register_handler - register a new input handler
+ * @handler: handler to be registered
+ *
+ * This function registers a new input handler (interface) for input
+ * devices in the system and attaches it to all input devices that
+ * are compatible with the handler.
+ */
int input_register_handler(struct input_handler *handler)
{
struct input_dev *dev;
+ int retval;
+
+ retval = mutex_lock_interruptible(&input_mutex);
+ if (retval)
+ return retval;
INIT_LIST_HEAD(&handler->h_list);
if (handler->fops != NULL) {
- if (input_table[handler->minor >> 5])
- return -EBUSY;
-
+ if (input_table[handler->minor >> 5]) {
+ retval = -EBUSY;
+ goto out;
+ }
input_table[handler->minor >> 5] = handler;
}
@@ -1167,14 +1415,26 @@
input_attach_handler(dev, handler);
input_wakeup_procfs_readers();
- return 0;
+
+ out:
+ mutex_unlock(&input_mutex);
+ return retval;
}
EXPORT_SYMBOL(input_register_handler);
+/**
+ * input_unregister_handler - unregisters an input handler
+ * @handler: handler to be unregistered
+ *
+ * This function disconnects a handler from its input devices and
+ * removes it from lists of known handlers.
+ */
void input_unregister_handler(struct input_handler *handler)
{
struct input_handle *handle, *next;
+ mutex_lock(&input_mutex);
+
list_for_each_entry_safe(handle, next, &handler->h_list, h_node)
handler->disconnect(handle);
WARN_ON(!list_empty(&handler->h_list));
@@ -1185,14 +1445,45 @@
input_table[handler->minor >> 5] = NULL;
input_wakeup_procfs_readers();
+
+ mutex_unlock(&input_mutex);
}
EXPORT_SYMBOL(input_unregister_handler);
+/**
+ * input_register_handle - register a new input handle
+ * @handle: handle to register
+ *
+ * This function puts a new input handle onto device's
+ * and handler's lists so that events can flow through
+ * it once it is opened using input_open_device().
+ *
+ * This function is supposed to be called from handler's
+ * connect() method.
+ */
int input_register_handle(struct input_handle *handle)
{
struct input_handler *handler = handle->handler;
+ struct input_dev *dev = handle->dev;
+ int error;
- list_add_tail(&handle->d_node, &handle->dev->h_list);
+ /*
+ * We take dev->mutex here to prevent race with
+ * input_release_device().
+ */
+ error = mutex_lock_interruptible(&dev->mutex);
+ if (error)
+ return error;
+ list_add_tail_rcu(&handle->d_node, &dev->h_list);
+ mutex_unlock(&dev->mutex);
+ synchronize_rcu();
+
+ /*
+ * Since we are supposed to be called from ->connect()
+ * which is mutually exclusive with ->disconnect()
+ * we can't be racing with input_unregister_handle()
+ * and so separate lock is not needed here.
+ */
list_add_tail(&handle->h_node, &handler->h_list);
if (handler->start)
@@ -1202,10 +1493,29 @@
}
EXPORT_SYMBOL(input_register_handle);
+/**
+ * input_unregister_handle - unregister an input handle
+ * @handle: handle to unregister
+ *
+ * This function removes input handle from device's
+ * and handler's lists.
+ *
+ * This function is supposed to be called from handler's
+ * disconnect() method.
+ */
void input_unregister_handle(struct input_handle *handle)
{
+ struct input_dev *dev = handle->dev;
+
list_del_init(&handle->h_node);
- list_del_init(&handle->d_node);
+
+ /*
+ * Take dev->mutex to prevent race with input_release_device().
+ */
+ mutex_lock(&dev->mutex);
+ list_del_rcu(&handle->d_node);
+ mutex_unlock(&dev->mutex);
+ synchronize_rcu();
}
EXPORT_SYMBOL(input_unregister_handle);
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index a9a0180..2b201f9 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -43,6 +43,8 @@
struct input_handle handle;
wait_queue_head_t wait;
struct list_head client_list;
+ spinlock_t client_lock; /* protects client_list */
+ struct mutex mutex;
struct device dev;
struct js_corr corr[ABS_MAX + 1];
@@ -61,31 +63,61 @@
int head;
int tail;
int startup;
+ spinlock_t buffer_lock; /* protects access to buffer, head and tail */
struct fasync_struct *fasync;
struct joydev *joydev;
struct list_head node;
};
static struct joydev *joydev_table[JOYDEV_MINORS];
+static DEFINE_MUTEX(joydev_table_mutex);
static int joydev_correct(int value, struct js_corr *corr)
{
switch (corr->type) {
- case JS_CORR_NONE:
- break;
- case JS_CORR_BROKEN:
- value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 :
- ((corr->coef[3] * (value - corr->coef[1])) >> 14)) :
- ((corr->coef[2] * (value - corr->coef[0])) >> 14);
- break;
- default:
- return 0;
+
+ case JS_CORR_NONE:
+ break;
+
+ case JS_CORR_BROKEN:
+ value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 :
+ ((corr->coef[3] * (value - corr->coef[1])) >> 14)) :
+ ((corr->coef[2] * (value - corr->coef[0])) >> 14);
+ break;
+
+ default:
+ return 0;
}
return value < -32767 ? -32767 : (value > 32767 ? 32767 : value);
}
-static void joydev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+static void joydev_pass_event(struct joydev_client *client,
+ struct js_event *event)
+{
+ struct joydev *joydev = client->joydev;
+
+ /*
+ * IRQs already disabled, just acquire the lock
+ */
+ spin_lock(&client->buffer_lock);
+
+ client->buffer[client->head] = *event;
+
+ if (client->startup == joydev->nabs + joydev->nkey) {
+ client->head++;
+ client->head &= JOYDEV_BUFFER_SIZE - 1;
+ if (client->tail == client->head)
+ client->startup = 0;
+ }
+
+ spin_unlock(&client->buffer_lock);
+
+ kill_fasync(&client->fasync, SIGIO, POLL_IN);
+}
+
+static void joydev_event(struct input_handle *handle,
+ unsigned int type, unsigned int code, int value)
{
struct joydev *joydev = handle->private;
struct joydev_client *client;
@@ -93,39 +125,34 @@
switch (type) {
- case EV_KEY:
- if (code < BTN_MISC || value == 2)
- return;
- event.type = JS_EVENT_BUTTON;
- event.number = joydev->keymap[code - BTN_MISC];
- event.value = value;
- break;
-
- case EV_ABS:
- event.type = JS_EVENT_AXIS;
- event.number = joydev->absmap[code];
- event.value = joydev_correct(value, joydev->corr + event.number);
- if (event.value == joydev->abs[event.number])
- return;
- joydev->abs[event.number] = event.value;
- break;
-
- default:
+ case EV_KEY:
+ if (code < BTN_MISC || value == 2)
return;
+ event.type = JS_EVENT_BUTTON;
+ event.number = joydev->keymap[code - BTN_MISC];
+ event.value = value;
+ break;
+
+ case EV_ABS:
+ event.type = JS_EVENT_AXIS;
+ event.number = joydev->absmap[code];
+ event.value = joydev_correct(value,
+ &joydev->corr[event.number]);
+ if (event.value == joydev->abs[event.number])
+ return;
+ joydev->abs[event.number] = event.value;
+ break;
+
+ default:
+ return;
}
event.time = jiffies_to_msecs(jiffies);
- list_for_each_entry(client, &joydev->client_list, node) {
-
- memcpy(client->buffer + client->head, &event, sizeof(struct js_event));
-
- if (client->startup == joydev->nabs + joydev->nkey)
- if (client->tail == (client->head = (client->head + 1) & (JOYDEV_BUFFER_SIZE - 1)))
- client->startup = 0;
-
- kill_fasync(&client->fasync, SIGIO, POLL_IN);
- }
+ rcu_read_lock();
+ list_for_each_entry_rcu(client, &joydev->client_list, node)
+ joydev_pass_event(client, &event);
+ rcu_read_unlock();
wake_up_interruptible(&joydev->wait);
}
@@ -144,23 +171,83 @@
{
struct joydev *joydev = container_of(dev, struct joydev, dev);
- joydev_table[joydev->minor] = NULL;
kfree(joydev);
}
+static void joydev_attach_client(struct joydev *joydev,
+ struct joydev_client *client)
+{
+ spin_lock(&joydev->client_lock);
+ list_add_tail_rcu(&client->node, &joydev->client_list);
+ spin_unlock(&joydev->client_lock);
+ synchronize_rcu();
+}
+
+static void joydev_detach_client(struct joydev *joydev,
+ struct joydev_client *client)
+{
+ spin_lock(&joydev->client_lock);
+ list_del_rcu(&client->node);
+ spin_unlock(&joydev->client_lock);
+ synchronize_rcu();
+}
+
+static int joydev_open_device(struct joydev *joydev)
+{
+ int retval;
+
+ retval = mutex_lock_interruptible(&joydev->mutex);
+ if (retval)
+ return retval;
+
+ if (!joydev->exist)
+ retval = -ENODEV;
+ else if (!joydev->open++) {
+ retval = input_open_device(&joydev->handle);
+ if (retval)
+ joydev->open--;
+ }
+
+ mutex_unlock(&joydev->mutex);
+ return retval;
+}
+
+static void joydev_close_device(struct joydev *joydev)
+{
+ mutex_lock(&joydev->mutex);
+
+ if (joydev->exist && !--joydev->open)
+ input_close_device(&joydev->handle);
+
+ mutex_unlock(&joydev->mutex);
+}
+
+/*
+ * Wake up users waiting for IO so they can disconnect from
+ * dead device.
+ */
+static void joydev_hangup(struct joydev *joydev)
+{
+ struct joydev_client *client;
+
+ spin_lock(&joydev->client_lock);
+ list_for_each_entry(client, &joydev->client_list, node)
+ kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+ spin_unlock(&joydev->client_lock);
+
+ wake_up_interruptible(&joydev->wait);
+}
+
static int joydev_release(struct inode *inode, struct file *file)
{
struct joydev_client *client = file->private_data;
struct joydev *joydev = client->joydev;
joydev_fasync(-1, file, 0);
-
- list_del(&client->node);
+ joydev_detach_client(joydev, client);
kfree(client);
- if (!--joydev->open && joydev->exist)
- input_close_device(&joydev->handle);
-
+ joydev_close_device(joydev);
put_device(&joydev->dev);
return 0;
@@ -176,11 +263,16 @@
if (i >= JOYDEV_MINORS)
return -ENODEV;
+ error = mutex_lock_interruptible(&joydev_table_mutex);
+ if (error)
+ return error;
joydev = joydev_table[i];
- if (!joydev || !joydev->exist)
- return -ENODEV;
+ if (joydev)
+ get_device(&joydev->dev);
+ mutex_unlock(&joydev_table_mutex);
- get_device(&joydev->dev);
+ if (!joydev)
+ return -ENODEV;
client = kzalloc(sizeof(struct joydev_client), GFP_KERNEL);
if (!client) {
@@ -188,37 +280,129 @@
goto err_put_joydev;
}
+ spin_lock_init(&client->buffer_lock);
client->joydev = joydev;
- list_add_tail(&client->node, &joydev->client_list);
+ joydev_attach_client(joydev, client);
- if (!joydev->open++ && joydev->exist) {
- error = input_open_device(&joydev->handle);
- if (error)
- goto err_free_client;
- }
+ error = joydev_open_device(joydev);
+ if (error)
+ goto err_free_client;
file->private_data = client;
return 0;
err_free_client:
- list_del(&client->node);
+ joydev_detach_client(joydev, client);
kfree(client);
err_put_joydev:
put_device(&joydev->dev);
return error;
}
-static ssize_t joydev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+static int joydev_generate_startup_event(struct joydev_client *client,
+ struct input_dev *input,
+ struct js_event *event)
{
- return -EINVAL;
+ struct joydev *joydev = client->joydev;
+ int have_event;
+
+ spin_lock_irq(&client->buffer_lock);
+
+ have_event = client->startup < joydev->nabs + joydev->nkey;
+
+ if (have_event) {
+
+ event->time = jiffies_to_msecs(jiffies);
+ if (client->startup < joydev->nkey) {
+ event->type = JS_EVENT_BUTTON | JS_EVENT_INIT;
+ event->number = client->startup;
+ event->value = !!test_bit(joydev->keypam[event->number],
+ input->key);
+ } else {
+ event->type = JS_EVENT_AXIS | JS_EVENT_INIT;
+ event->number = client->startup - joydev->nkey;
+ event->value = joydev->abs[event->number];
+ }
+ client->startup++;
+ }
+
+ spin_unlock_irq(&client->buffer_lock);
+
+ return have_event;
}
-static ssize_t joydev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static int joydev_fetch_next_event(struct joydev_client *client,
+ struct js_event *event)
+{
+ int have_event;
+
+ spin_lock_irq(&client->buffer_lock);
+
+ have_event = client->head != client->tail;
+ if (have_event) {
+ *event = client->buffer[client->tail++];
+ client->tail &= JOYDEV_BUFFER_SIZE - 1;
+ }
+
+ spin_unlock_irq(&client->buffer_lock);
+
+ return have_event;
+}
+
+/*
+ * Old joystick interface
+ */
+static ssize_t joydev_0x_read(struct joydev_client *client,
+ struct input_dev *input,
+ char __user *buf)
+{
+ struct joydev *joydev = client->joydev;
+ struct JS_DATA_TYPE data;
+ int i;
+
+ spin_lock_irq(&input->event_lock);
+
+ /*
+ * Get device state
+ */
+ for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++)
+ data.buttons |=
+ test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0;
+ data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x;
+ data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y;
+
+ /*
+ * Reset reader's event queue
+ */
+ spin_lock(&client->buffer_lock);
+ client->startup = 0;
+ client->tail = client->head;
+ spin_unlock(&client->buffer_lock);
+
+ spin_unlock_irq(&input->event_lock);
+
+ if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE)))
+ return -EFAULT;
+
+ return sizeof(struct JS_DATA_TYPE);
+}
+
+static inline int joydev_data_pending(struct joydev_client *client)
+{
+ struct joydev *joydev = client->joydev;
+
+ return client->startup < joydev->nabs + joydev->nkey ||
+ client->head != client->tail;
+}
+
+static ssize_t joydev_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
{
struct joydev_client *client = file->private_data;
struct joydev *joydev = client->joydev;
struct input_dev *input = joydev->handle.dev;
- int retval = 0;
+ struct js_event event;
+ int retval;
if (!joydev->exist)
return -ENODEV;
@@ -226,68 +410,35 @@
if (count < sizeof(struct js_event))
return -EINVAL;
- if (count == sizeof(struct JS_DATA_TYPE)) {
+ if (count == sizeof(struct JS_DATA_TYPE))
+ return joydev_0x_read(client, input, buf);
- struct JS_DATA_TYPE data;
- int i;
-
- for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++)
- data.buttons |= test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0;
- data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x;
- data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y;
-
- if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE)))
- return -EFAULT;
-
- client->startup = 0;
- client->tail = client->head;
-
- return sizeof(struct JS_DATA_TYPE);
- }
-
- if (client->startup == joydev->nabs + joydev->nkey &&
- client->head == client->tail && (file->f_flags & O_NONBLOCK))
+ if (!joydev_data_pending(client) && (file->f_flags & O_NONBLOCK))
return -EAGAIN;
retval = wait_event_interruptible(joydev->wait,
- !joydev->exist ||
- client->startup < joydev->nabs + joydev->nkey ||
- client->head != client->tail);
+ !joydev->exist || joydev_data_pending(client));
if (retval)
return retval;
if (!joydev->exist)
return -ENODEV;
- while (client->startup < joydev->nabs + joydev->nkey && retval + sizeof(struct js_event) <= count) {
-
- struct js_event event;
-
- event.time = jiffies_to_msecs(jiffies);
-
- if (client->startup < joydev->nkey) {
- event.type = JS_EVENT_BUTTON | JS_EVENT_INIT;
- event.number = client->startup;
- event.value = !!test_bit(joydev->keypam[event.number], input->key);
- } else {
- event.type = JS_EVENT_AXIS | JS_EVENT_INIT;
- event.number = client->startup - joydev->nkey;
- event.value = joydev->abs[event.number];
- }
+ while (retval + sizeof(struct js_event) <= count &&
+ joydev_generate_startup_event(client, input, &event)) {
if (copy_to_user(buf + retval, &event, sizeof(struct js_event)))
return -EFAULT;
- client->startup++;
retval += sizeof(struct js_event);
}
- while (client->head != client->tail && retval + sizeof(struct js_event) <= count) {
+ while (retval + sizeof(struct js_event) <= count &&
+ joydev_fetch_next_event(client, &event)) {
- if (copy_to_user(buf + retval, client->buffer + client->tail, sizeof(struct js_event)))
+ if (copy_to_user(buf + retval, &event, sizeof(struct js_event)))
return -EFAULT;
- client->tail = (client->tail + 1) & (JOYDEV_BUFFER_SIZE - 1);
retval += sizeof(struct js_event);
}
@@ -301,126 +452,144 @@
struct joydev *joydev = client->joydev;
poll_wait(file, &joydev->wait, wait);
- return ((client->head != client->tail || client->startup < joydev->nabs + joydev->nkey) ?
- (POLLIN | POLLRDNORM) : 0) | (joydev->exist ? 0 : (POLLHUP | POLLERR));
+ return (joydev_data_pending(client) ? (POLLIN | POLLRDNORM) : 0) |
+ (joydev->exist ? 0 : (POLLHUP | POLLERR));
}
-static int joydev_ioctl_common(struct joydev *joydev, unsigned int cmd, void __user *argp)
+static int joydev_ioctl_common(struct joydev *joydev,
+ unsigned int cmd, void __user *argp)
{
struct input_dev *dev = joydev->handle.dev;
int i, j;
switch (cmd) {
- case JS_SET_CAL:
- return copy_from_user(&joydev->glue.JS_CORR, argp,
+ case JS_SET_CAL:
+ return copy_from_user(&joydev->glue.JS_CORR, argp,
sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
- case JS_GET_CAL:
- return copy_to_user(argp, &joydev->glue.JS_CORR,
+ case JS_GET_CAL:
+ return copy_to_user(argp, &joydev->glue.JS_CORR,
sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
- case JS_SET_TIMEOUT:
- return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
+ case JS_SET_TIMEOUT:
+ return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
- case JS_GET_TIMEOUT:
- return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
+ case JS_GET_TIMEOUT:
+ return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
- case JSIOCGVERSION:
- return put_user(JS_VERSION, (__u32 __user *) argp);
+ case JSIOCGVERSION:
+ return put_user(JS_VERSION, (__u32 __user *) argp);
- case JSIOCGAXES:
- return put_user(joydev->nabs, (__u8 __user *) argp);
+ case JSIOCGAXES:
+ return put_user(joydev->nabs, (__u8 __user *) argp);
- case JSIOCGBUTTONS:
- return put_user(joydev->nkey, (__u8 __user *) argp);
+ case JSIOCGBUTTONS:
+ return put_user(joydev->nkey, (__u8 __user *) argp);
- case JSIOCSCORR:
- if (copy_from_user(joydev->corr, argp,
- sizeof(joydev->corr[0]) * joydev->nabs))
- return -EFAULT;
- for (i = 0; i < joydev->nabs; i++) {
- j = joydev->abspam[i];
- joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i);
- }
- return 0;
+ case JSIOCSCORR:
+ if (copy_from_user(joydev->corr, argp,
+ sizeof(joydev->corr[0]) * joydev->nabs))
+ return -EFAULT;
- case JSIOCGCORR:
- return copy_to_user(argp, joydev->corr,
- sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0;
+ for (i = 0; i < joydev->nabs; i++) {
+ j = joydev->abspam[i];
+ joydev->abs[i] = joydev_correct(dev->abs[j],
+ &joydev->corr[i]);
+ }
+ return 0;
- case JSIOCSAXMAP:
- if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1)))
+ case JSIOCGCORR:
+ return copy_to_user(argp, joydev->corr,
+ sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0;
+
+ case JSIOCSAXMAP:
+ if (copy_from_user(joydev->abspam, argp,
+ sizeof(__u8) * (ABS_MAX + 1)))
+ return -EFAULT;
+
+ for (i = 0; i < joydev->nabs; i++) {
+ if (joydev->abspam[i] > ABS_MAX)
+ return -EINVAL;
+ joydev->absmap[joydev->abspam[i]] = i;
+ }
+ return 0;
+
+ case JSIOCGAXMAP:
+ return copy_to_user(argp, joydev->abspam,
+ sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0;
+
+ case JSIOCSBTNMAP:
+ if (copy_from_user(joydev->keypam, argp,
+ sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)))
+ return -EFAULT;
+
+ for (i = 0; i < joydev->nkey; i++) {
+ if (joydev->keypam[i] > KEY_MAX ||
+ joydev->keypam[i] < BTN_MISC)
+ return -EINVAL;
+ joydev->keymap[joydev->keypam[i] - BTN_MISC] = i;
+ }
+
+ return 0;
+
+ case JSIOCGBTNMAP:
+ return copy_to_user(argp, joydev->keypam,
+ sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0;
+
+ default:
+ if ((cmd & ~IOCSIZE_MASK) == JSIOCGNAME(0)) {
+ int len;
+ if (!dev->name)
+ return 0;
+ len = strlen(dev->name) + 1;
+ if (len > _IOC_SIZE(cmd))
+ len = _IOC_SIZE(cmd);
+ if (copy_to_user(argp, dev->name, len))
return -EFAULT;
- for (i = 0; i < joydev->nabs; i++) {
- if (joydev->abspam[i] > ABS_MAX)
- return -EINVAL;
- joydev->absmap[joydev->abspam[i]] = i;
- }
- return 0;
-
- case JSIOCGAXMAP:
- return copy_to_user(argp, joydev->abspam,
- sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0;
-
- case JSIOCSBTNMAP:
- if (copy_from_user(joydev->keypam, argp, sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)))
- return -EFAULT;
- for (i = 0; i < joydev->nkey; i++) {
- if (joydev->keypam[i] > KEY_MAX || joydev->keypam[i] < BTN_MISC)
- return -EINVAL;
- joydev->keymap[joydev->keypam[i] - BTN_MISC] = i;
- }
- return 0;
-
- case JSIOCGBTNMAP:
- return copy_to_user(argp, joydev->keypam,
- sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0;
-
- default:
- if ((cmd & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) == JSIOCGNAME(0)) {
- int len;
- if (!dev->name)
- return 0;
- len = strlen(dev->name) + 1;
- if (len > _IOC_SIZE(cmd))
- len = _IOC_SIZE(cmd);
- if (copy_to_user(argp, dev->name, len))
- return -EFAULT;
- return len;
- }
+ return len;
+ }
}
return -EINVAL;
}
#ifdef CONFIG_COMPAT
-static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long joydev_compat_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
{
struct joydev_client *client = file->private_data;
struct joydev *joydev = client->joydev;
void __user *argp = (void __user *)arg;
s32 tmp32;
struct JS_DATA_SAVE_TYPE_32 ds32;
- int err;
+ int retval;
- if (!joydev->exist)
- return -ENODEV;
+ retval = mutex_lock_interruptible(&joydev->mutex);
+ if (retval)
+ return retval;
- switch(cmd) {
+ if (!joydev->exist) {
+ retval = -ENODEV;
+ goto out;
+ }
+
+ switch (cmd) {
+
case JS_SET_TIMELIMIT:
- err = get_user(tmp32, (s32 __user *) arg);
- if (err == 0)
+ retval = get_user(tmp32, (s32 __user *) arg);
+ if (retval == 0)
joydev->glue.JS_TIMELIMIT = tmp32;
break;
+
case JS_GET_TIMELIMIT:
tmp32 = joydev->glue.JS_TIMELIMIT;
- err = put_user(tmp32, (s32 __user *) arg);
+ retval = put_user(tmp32, (s32 __user *) arg);
break;
case JS_SET_ALL:
- err = copy_from_user(&ds32, argp,
- sizeof(ds32)) ? -EFAULT : 0;
- if (err == 0) {
+ retval = copy_from_user(&ds32, argp,
+ sizeof(ds32)) ? -EFAULT : 0;
+ if (retval == 0) {
joydev->glue.JS_TIMEOUT = ds32.JS_TIMEOUT;
joydev->glue.BUSY = ds32.BUSY;
joydev->glue.JS_EXPIRETIME = ds32.JS_EXPIRETIME;
@@ -438,55 +607,119 @@
ds32.JS_SAVE = joydev->glue.JS_SAVE;
ds32.JS_CORR = joydev->glue.JS_CORR;
- err = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0;
+ retval = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0;
break;
default:
- err = joydev_ioctl_common(joydev, cmd, argp);
+ retval = joydev_ioctl_common(joydev, cmd, argp);
+ break;
}
- return err;
+
+ out:
+ mutex_unlock(&joydev->mutex);
+ return retval;
}
#endif /* CONFIG_COMPAT */
-static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long joydev_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
{
struct joydev_client *client = file->private_data;
struct joydev *joydev = client->joydev;
void __user *argp = (void __user *)arg;
+ int retval;
- if (!joydev->exist)
- return -ENODEV;
+ retval = mutex_lock_interruptible(&joydev->mutex);
+ if (retval)
+ return retval;
- switch(cmd) {
- case JS_SET_TIMELIMIT:
- return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
- case JS_GET_TIMELIMIT:
- return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
- case JS_SET_ALL:
- return copy_from_user(&joydev->glue, argp,
- sizeof(joydev->glue)) ? -EFAULT : 0;
- case JS_GET_ALL:
- return copy_to_user(argp, &joydev->glue,
- sizeof(joydev->glue)) ? -EFAULT : 0;
- default:
- return joydev_ioctl_common(joydev, cmd, argp);
+ if (!joydev->exist) {
+ retval = -ENODEV;
+ goto out;
}
+
+ switch (cmd) {
+
+ case JS_SET_TIMELIMIT:
+ retval = get_user(joydev->glue.JS_TIMELIMIT,
+ (long __user *) arg);
+ break;
+
+ case JS_GET_TIMELIMIT:
+ retval = put_user(joydev->glue.JS_TIMELIMIT,
+ (long __user *) arg);
+ break;
+
+ case JS_SET_ALL:
+ retval = copy_from_user(&joydev->glue, argp,
+ sizeof(joydev->glue)) ? -EFAULT: 0;
+ break;
+
+ case JS_GET_ALL:
+ retval = copy_to_user(argp, &joydev->glue,
+ sizeof(joydev->glue)) ? -EFAULT : 0;
+ break;
+
+ default:
+ retval = joydev_ioctl_common(joydev, cmd, argp);
+ break;
+ }
+ out:
+ mutex_unlock(&joydev->mutex);
+ return retval;
}
static const struct file_operations joydev_fops = {
- .owner = THIS_MODULE,
- .read = joydev_read,
- .write = joydev_write,
- .poll = joydev_poll,
- .open = joydev_open,
- .release = joydev_release,
- .ioctl = joydev_ioctl,
+ .owner = THIS_MODULE,
+ .read = joydev_read,
+ .poll = joydev_poll,
+ .open = joydev_open,
+ .release = joydev_release,
+ .unlocked_ioctl = joydev_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = joydev_compat_ioctl,
+ .compat_ioctl = joydev_compat_ioctl,
#endif
- .fasync = joydev_fasync,
+ .fasync = joydev_fasync,
};
+static int joydev_install_chrdev(struct joydev *joydev)
+{
+ joydev_table[joydev->minor] = joydev;
+ return 0;
+}
+
+static void joydev_remove_chrdev(struct joydev *joydev)
+{
+ mutex_lock(&joydev_table_mutex);
+ joydev_table[joydev->minor] = NULL;
+ mutex_unlock(&joydev_table_mutex);
+}
+
+/*
+ * Mark device non-existant. This disables writes, ioctls and
+ * prevents new users from opening the device. Already posted
+ * blocking reads will stay, however new ones will fail.
+ */
+static void joydev_mark_dead(struct joydev *joydev)
+{
+ mutex_lock(&joydev->mutex);
+ joydev->exist = 0;
+ mutex_unlock(&joydev->mutex);
+}
+
+static void joydev_cleanup(struct joydev *joydev)
+{
+ struct input_handle *handle = &joydev->handle;
+
+ joydev_mark_dead(joydev);
+ joydev_hangup(joydev);
+ joydev_remove_chrdev(joydev);
+
+ /* joydev is marked dead so noone else accesses joydev->open */
+ if (joydev->open)
+ input_close_device(handle);
+}
+
static int joydev_connect(struct input_handler *handler, struct input_dev *dev,
const struct input_device_id *id)
{
@@ -494,7 +727,10 @@
int i, j, t, minor;
int error;
- for (minor = 0; minor < JOYDEV_MINORS && joydev_table[minor]; minor++);
+ for (minor = 0; minor < JOYDEV_MINORS; minor++)
+ if (!joydev_table[minor])
+ break;
+
if (minor == JOYDEV_MINORS) {
printk(KERN_ERR "joydev: no more free joydev devices\n");
return -ENFILE;
@@ -505,15 +741,19 @@
return -ENOMEM;
INIT_LIST_HEAD(&joydev->client_list);
+ spin_lock_init(&joydev->client_lock);
+ mutex_init(&joydev->mutex);
init_waitqueue_head(&joydev->wait);
+ snprintf(joydev->name, sizeof(joydev->name), "js%d", minor);
+ joydev->exist = 1;
joydev->minor = minor;
+
joydev->exist = 1;
joydev->handle.dev = dev;
joydev->handle.name = joydev->name;
joydev->handle.handler = handler;
joydev->handle.private = joydev;
- snprintf(joydev->name, sizeof(joydev->name), "js%d", minor);
for (i = 0; i < ABS_MAX + 1; i++)
if (test_bit(i, dev->absbit)) {
@@ -545,67 +785,65 @@
}
joydev->corr[i].type = JS_CORR_BROKEN;
joydev->corr[i].prec = dev->absfuzz[j];
- joydev->corr[i].coef[0] = (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j];
- joydev->corr[i].coef[1] = (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j];
- if (!(t = ((dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j])))
- continue;
- joydev->corr[i].coef[2] = (1 << 29) / t;
- joydev->corr[i].coef[3] = (1 << 29) / t;
+ joydev->corr[i].coef[0] =
+ (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j];
+ joydev->corr[i].coef[1] =
+ (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j];
- joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i);
+ t = (dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j];
+ if (t) {
+ joydev->corr[i].coef[2] = (1 << 29) / t;
+ joydev->corr[i].coef[3] = (1 << 29) / t;
+
+ joydev->abs[i] = joydev_correct(dev->abs[j],
+ joydev->corr + i);
+ }
}
- snprintf(joydev->dev.bus_id, sizeof(joydev->dev.bus_id),
- "js%d", minor);
+ strlcpy(joydev->dev.bus_id, joydev->name, sizeof(joydev->dev.bus_id));
+ joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor);
joydev->dev.class = &input_class;
joydev->dev.parent = &dev->dev;
- joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor);
joydev->dev.release = joydev_free;
device_initialize(&joydev->dev);
- joydev_table[minor] = joydev;
-
- error = device_add(&joydev->dev);
+ error = input_register_handle(&joydev->handle);
if (error)
goto err_free_joydev;
- error = input_register_handle(&joydev->handle);
+ error = joydev_install_chrdev(joydev);
if (error)
- goto err_delete_joydev;
+ goto err_unregister_handle;
+
+ error = device_add(&joydev->dev);
+ if (error)
+ goto err_cleanup_joydev;
return 0;
- err_delete_joydev:
- device_del(&joydev->dev);
+ err_cleanup_joydev:
+ joydev_cleanup(joydev);
+ err_unregister_handle:
+ input_unregister_handle(&joydev->handle);
err_free_joydev:
put_device(&joydev->dev);
return error;
}
-
static void joydev_disconnect(struct input_handle *handle)
{
struct joydev *joydev = handle->private;
- struct joydev_client *client;
- input_unregister_handle(handle);
device_del(&joydev->dev);
-
- joydev->exist = 0;
-
- if (joydev->open) {
- input_close_device(handle);
- list_for_each_entry(client, &joydev->client_list, node)
- kill_fasync(&client->fasync, SIGIO, POLL_HUP);
- wake_up_interruptible(&joydev->wait);
- }
-
+ joydev_cleanup(joydev);
+ input_unregister_handle(handle);
put_device(&joydev->dev);
}
static const struct input_device_id joydev_blacklist[] = {
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_KEYBIT,
.evbit = { BIT(EV_KEY) },
.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
}, /* Avoid itouchpads, touchscreens and tablets */
@@ -614,17 +852,20 @@
static const struct input_device_id joydev_ids[] = {
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT(EV_ABS) },
.absbit = { BIT(ABS_X) },
},
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT(EV_ABS) },
.absbit = { BIT(ABS_WHEEL) },
},
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT(EV_ABS) },
.absbit = { BIT(ABS_THROTTLE) },
},
@@ -634,14 +875,14 @@
MODULE_DEVICE_TABLE(input, joydev_ids);
static struct input_handler joydev_handler = {
- .event = joydev_event,
- .connect = joydev_connect,
- .disconnect = joydev_disconnect,
- .fops = &joydev_fops,
- .minor = JOYDEV_MINOR_BASE,
- .name = "joydev",
- .id_table = joydev_ids,
- .blacklist = joydev_blacklist,
+ .event = joydev_event,
+ .connect = joydev_connect,
+ .disconnect = joydev_disconnect,
+ .fops = &joydev_fops,
+ .minor = JOYDEV_MINOR_BASE,
+ .name = "joydev",
+ .id_table = joydev_ids,
+ .blacklist = joydev_blacklist,
};
static int __init joydev_init(void)
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 2808039..623629a 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -223,12 +223,16 @@
struct input_dev *dev = xpad->dev;
/* left stick */
- input_report_abs(dev, ABS_X, (__s16) (((__s16)data[13] << 8) | data[12]));
- input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[15] << 8) | data[14]));
+ input_report_abs(dev, ABS_X,
+ (__s16) le16_to_cpup((__le16 *)(data + 12)));
+ input_report_abs(dev, ABS_Y,
+ (__s16) le16_to_cpup((__le16 *)(data + 14)));
/* right stick */
- input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[17] << 8) | data[16]));
- input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[19] << 8) | data[18]));
+ input_report_abs(dev, ABS_RX,
+ (__s16) le16_to_cpup((__le16 *)(data + 16)));
+ input_report_abs(dev, ABS_RY,
+ (__s16) le16_to_cpup((__le16 *)(data + 18)));
/* triggers left/right */
input_report_abs(dev, ABS_Z, data[10]);
@@ -236,8 +240,10 @@
/* digital pad */
if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) {
- input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04));
- input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01));
+ input_report_abs(dev, ABS_HAT0X,
+ !!(data[2] & 0x08) - !!(data[2] & 0x04));
+ input_report_abs(dev, ABS_HAT0Y,
+ !!(data[2] & 0x02) - !!(data[2] & 0x01));
} else /* xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS */ {
input_report_key(dev, BTN_LEFT, data[2] & 0x04);
input_report_key(dev, BTN_RIGHT, data[2] & 0x08);
@@ -274,14 +280,17 @@
* http://www.free60.org/wiki/Gamepad
*/
-static void xpad360_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
+static void xpad360_process_packet(struct usb_xpad *xpad,
+ u16 cmd, unsigned char *data)
{
struct input_dev *dev = xpad->dev;
/* digital pad */
if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) {
- input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04));
- input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01));
+ input_report_abs(dev, ABS_HAT0X,
+ !!(data[2] & 0x08) - !!(data[2] & 0x04));
+ input_report_abs(dev, ABS_HAT0Y,
+ !!(data[2] & 0x02) - !!(data[2] & 0x01));
} else if (xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS) {
/* dpad as buttons (right, left, down, up) */
input_report_key(dev, BTN_LEFT, data[2] & 0x04);
@@ -308,12 +317,16 @@
input_report_key(dev, BTN_MODE, data[3] & 0x04);
/* left stick */
- input_report_abs(dev, ABS_X, (__s16) (((__s16)data[7] << 8) | (__s16)data[6]));
- input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[9] << 8) | (__s16)data[8]));
+ input_report_abs(dev, ABS_X,
+ (__s16) le16_to_cpup((__le16 *)(data + 6)));
+ input_report_abs(dev, ABS_Y,
+ (__s16) le16_to_cpup((__le16 *)(data + 8)));
/* right stick */
- input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[11] << 8) | (__s16)data[10]));
- input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[13] << 8) | (__s16)data[12]));
+ input_report_abs(dev, ABS_RX,
+ (__s16) le16_to_cpup((__le16 *)(data + 10)));
+ input_report_abs(dev, ABS_RY,
+ (__s16) le16_to_cpup((__le16 *)(data + 12)));
/* triggers left/right */
input_report_abs(dev, ABS_Z, data[4]);
@@ -335,10 +348,12 @@
case -ENOENT:
case -ESHUTDOWN:
/* this urb is terminated, clean up */
- dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status);
+ dbg("%s - urb shutting down with status: %d",
+ __FUNCTION__, urb->status);
return;
default:
- dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status);
+ dbg("%s - nonzero urb status received: %d",
+ __FUNCTION__, urb->status);
goto exit;
}
@@ -367,10 +382,12 @@
case -ENOENT:
case -ESHUTDOWN:
/* this urb is terminated, clean up */
- dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status);
+ dbg("%s - urb shutting down with status: %d",
+ __FUNCTION__, urb->status);
return;
default:
- dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status);
+ dbg("%s - nonzero urb status received: %d",
+ __FUNCTION__, urb->status);
goto exit;
}
@@ -378,7 +395,7 @@
retval = usb_submit_urb(urb, GFP_ATOMIC);
if (retval)
err("%s - usb_submit_urb failed with result %d",
- __FUNCTION__, retval);
+ __FUNCTION__, retval);
}
static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
@@ -595,7 +612,7 @@
static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
- struct usb_device *udev = interface_to_usbdev (intf);
+ struct usb_device *udev = interface_to_usbdev(intf);
struct usb_xpad *xpad;
struct input_dev *input_dev;
struct usb_endpoint_descriptor *ep_irq_in;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index c97d5eb..2316a01 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -208,6 +208,27 @@
This driver implements support for HIL-keyboards attached
to your machine, so normally you should say Y here.
+config KEYBOARD_HP6XX
+ tristate "HP Jornada 6XX Keyboard support"
+ depends on SH_HP6XX
+ select INPUT_POLLDEV
+ help
+ This adds support for the onboard keyboard found on
+ HP Jornada 620/660/680/690.
+
+ To compile this driver as a module, choose M here: the
+ module will be called jornada680_kbd.
+
+config KEYBOARD_HP7XX
+ tristate "HP Jornada 7XX Keyboard Driver"
+ depends on SA1100_JORNADA720_SSP && SA1100_SSP
+ help
+ Say Y here to add support for the HP Jornada 7xx (710/720/728)
+ onboard keyboard.
+
+ To compile this driver as a module, choose M here: the
+ module will be called jornada720_kbd.
+
config KEYBOARD_OMAP
tristate "TI OMAP keypad support"
depends on (ARCH_OMAP1 || ARCH_OMAP2)
@@ -253,4 +274,23 @@
To compile this driver as a module, choose M here: the
module will be called gpio-keys.
+config KEYBOARD_MAPLE
+ tristate "Maple bus keyboard"
+ depends on SH_DREAMCAST && MAPLE
+ help
+ Say Y here if you have a Dreamcast console running Linux and have
+ a keyboard attached to its Maple bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called maple_keyb.
+
+config KEYBOARD_BFIN
+ tristate "Blackfin BF54x keypad support"
+ depends on BF54x
+ help
+ Say Y here if you want to use the BF54x keypad.
+
+ To compile this driver as a module, choose M here: the
+ module will be called bf54x-keys.
+
endif
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 28d211b..e97455f 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -21,4 +21,7 @@
obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keyboard.o
obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o
obj-$(CONFIG_KEYBOARD_GPIO) += gpio_keys.o
-
+obj-$(CONFIG_KEYBOARD_HP6XX) += jornada680_kbd.o
+obj-$(CONFIG_KEYBOARD_HP7XX) += jornada720_kbd.o
+obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o
+obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index f948d3a..a180015 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -217,7 +217,7 @@
static int __init atakbd_init(void)
{
- int i;
+ int i, error;
if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP))
return -EIO;
@@ -247,9 +247,10 @@
}
/* error check */
- if (input_register_device(atakbd_dev)) {
+ error = input_register_device(atakbd_dev);
+ if (error) {
input_free_device(atakbd_dev);
- return -ENOMEM;
+ return error;
}
atari_input_keyboard_interrupt_hook = atakbd_interrupt;
diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c
new file mode 100644
index 0000000..a67b29b
--- /dev/null
+++ b/drivers/input/keyboard/bf54x-keys.c
@@ -0,0 +1,382 @@
+/*
+ * File: drivers/input/keyboard/bf54x-keys.c
+ * Based on:
+ * Author: Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * Created:
+ * Description: keypad driver for Analog Devices Blackfin BF54x Processors
+ *
+ *
+ * Modified:
+ * Copyright 2007 Analog Devices Inc.
+ *
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/pm.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+#include <asm/portmux.h>
+#include <asm/mach/bf54x_keys.h>
+
+#define DRV_NAME "bf54x-keys"
+#define TIME_SCALE 100 /* 100 ns */
+#define MAX_MULT (0xFF * TIME_SCALE)
+#define MAX_RC 8 /* Max Row/Col */
+
+static const u16 per_rows[] = {
+ P_KEY_ROW7,
+ P_KEY_ROW6,
+ P_KEY_ROW5,
+ P_KEY_ROW4,
+ P_KEY_ROW3,
+ P_KEY_ROW2,
+ P_KEY_ROW1,
+ P_KEY_ROW0,
+ 0
+};
+
+static const u16 per_cols[] = {
+ P_KEY_COL7,
+ P_KEY_COL6,
+ P_KEY_COL5,
+ P_KEY_COL4,
+ P_KEY_COL3,
+ P_KEY_COL2,
+ P_KEY_COL1,
+ P_KEY_COL0,
+ 0
+};
+
+struct bf54x_kpad {
+ struct input_dev *input;
+ int irq;
+ unsigned short lastkey;
+ unsigned short *keycode;
+ struct timer_list timer;
+ unsigned int keyup_test_jiffies;
+};
+
+static inline int bfin_kpad_find_key(struct bf54x_kpad *bf54x_kpad,
+ struct input_dev *input, u16 keyident)
+{
+ u16 i;
+
+ for (i = 0; i < input->keycodemax; i++)
+ if (bf54x_kpad->keycode[i + input->keycodemax] == keyident)
+ return bf54x_kpad->keycode[i];
+ return -1;
+}
+
+static inline void bfin_keycodecpy(unsigned short *keycode,
+ const unsigned int *pdata_kc,
+ unsigned short keymapsize)
+{
+ unsigned int i;
+
+ for (i = 0; i < keymapsize; i++) {
+ keycode[i] = pdata_kc[i] & 0xffff;
+ keycode[i + keymapsize] = pdata_kc[i] >> 16;
+ }
+}
+
+static inline u16 bfin_kpad_get_prescale(u32 timescale)
+{
+ u32 sclk = get_sclk();
+
+ return ((((sclk / 1000) * timescale) / 1024) - 1);
+}
+
+static inline u16 bfin_kpad_get_keypressed(struct bf54x_kpad *bf54x_kpad)
+{
+ return (bfin_read_KPAD_STAT() & KPAD_PRESSED);
+}
+
+static inline void bfin_kpad_clear_irq(void)
+{
+ bfin_write_KPAD_STAT(0xFFFF);
+ bfin_write_KPAD_ROWCOL(0xFFFF);
+}
+
+static void bfin_kpad_timer(unsigned long data)
+{
+ struct platform_device *pdev = (struct platform_device *) data;
+ struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
+
+ if (bfin_kpad_get_keypressed(bf54x_kpad)) {
+ /* Try again later */
+ mod_timer(&bf54x_kpad->timer,
+ jiffies + bf54x_kpad->keyup_test_jiffies);
+ return;
+ }
+
+ input_report_key(bf54x_kpad->input, bf54x_kpad->lastkey, 0);
+ input_sync(bf54x_kpad->input);
+
+ /* Clear IRQ Status */
+
+ bfin_kpad_clear_irq();
+ enable_irq(bf54x_kpad->irq);
+}
+
+static irqreturn_t bfin_kpad_isr(int irq, void *dev_id)
+{
+ struct platform_device *pdev = dev_id;
+ struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
+ struct input_dev *input = bf54x_kpad->input;
+ int key;
+ u16 rowcol = bfin_read_KPAD_ROWCOL();
+
+ key = bfin_kpad_find_key(bf54x_kpad, input, rowcol);
+
+ input_report_key(input, key, 1);
+ input_sync(input);
+
+ if (bfin_kpad_get_keypressed(bf54x_kpad)) {
+ disable_irq(bf54x_kpad->irq);
+ bf54x_kpad->lastkey = key;
+ mod_timer(&bf54x_kpad->timer,
+ jiffies + bf54x_kpad->keyup_test_jiffies);
+ } else {
+ input_report_key(input, key, 0);
+ input_sync(input);
+
+ bfin_kpad_clear_irq();
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit bfin_kpad_probe(struct platform_device *pdev)
+{
+ struct bf54x_kpad *bf54x_kpad;
+ struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data;
+ struct input_dev *input;
+ int i, error;
+
+ if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+ printk(KERN_ERR DRV_NAME
+ ": No rows, cols or keymap from pdata\n");
+ return -EINVAL;
+ }
+
+ if (!pdata->keymapsize ||
+ pdata->keymapsize > (pdata->rows * pdata->cols)) {
+ printk(KERN_ERR DRV_NAME ": Invalid keymapsize\n");
+ return -EINVAL;
+ }
+
+ bf54x_kpad = kzalloc(sizeof(struct bf54x_kpad), GFP_KERNEL);
+ if (!bf54x_kpad)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, bf54x_kpad);
+
+ /* Allocate memory for keymap followed by private LUT */
+ bf54x_kpad->keycode = kmalloc(pdata->keymapsize *
+ sizeof(unsigned short) * 2, GFP_KERNEL);
+ if (!bf54x_kpad->keycode) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ if (!pdata->debounce_time || !pdata->debounce_time > MAX_MULT ||
+ !pdata->coldrive_time || !pdata->coldrive_time > MAX_MULT) {
+ printk(KERN_ERR DRV_NAME
+ ": Invalid Debounce/Columdrive Time from pdata\n");
+ bfin_write_KPAD_MSEL(0xFF0); /* Default MSEL */
+ } else {
+ bfin_write_KPAD_MSEL(
+ ((pdata->debounce_time / TIME_SCALE)
+ & DBON_SCALE) |
+ (((pdata->coldrive_time / TIME_SCALE) << 8)
+ & COLDRV_SCALE));
+
+ }
+
+ if (!pdata->keyup_test_interval)
+ bf54x_kpad->keyup_test_jiffies = msecs_to_jiffies(50);
+ else
+ bf54x_kpad->keyup_test_jiffies =
+ msecs_to_jiffies(pdata->keyup_test_interval);
+
+ if (peripheral_request_list((u16 *)&per_rows[MAX_RC - pdata->rows],
+ DRV_NAME)) {
+ printk(KERN_ERR DRV_NAME
+ ": Requesting Peripherals failed\n");
+ error = -EFAULT;
+ goto out0;
+ }
+
+ if (peripheral_request_list((u16 *)&per_cols[MAX_RC - pdata->cols],
+ DRV_NAME)) {
+ printk(KERN_ERR DRV_NAME
+ ": Requesting Peripherals failed\n");
+ error = -EFAULT;
+ goto out1;
+ }
+
+ bf54x_kpad->irq = platform_get_irq(pdev, 0);
+ if (bf54x_kpad->irq < 0) {
+ error = -ENODEV;
+ goto out2;
+ }
+
+ error = request_irq(bf54x_kpad->irq, bfin_kpad_isr,
+ IRQF_SAMPLE_RANDOM, DRV_NAME, pdev);
+ if (error) {
+ printk(KERN_ERR DRV_NAME
+ ": unable to claim irq %d; error %d\n",
+ bf54x_kpad->irq, error);
+ error = -EBUSY;
+ goto out2;
+ }
+
+ input = input_allocate_device();
+ if (!input) {
+ error = -ENOMEM;
+ goto out3;
+ }
+
+ bf54x_kpad->input = input;
+
+ input->name = pdev->name;
+ input->phys = "bf54x-keys/input0";
+ input->dev.parent = &pdev->dev;
+
+ input_set_drvdata(input, bf54x_kpad);
+
+ input->id.bustype = BUS_HOST;
+ input->id.vendor = 0x0001;
+ input->id.product = 0x0001;
+ input->id.version = 0x0100;
+
+ input->keycodesize = sizeof(unsigned short);
+ input->keycodemax = pdata->keymapsize;
+ input->keycode = bf54x_kpad->keycode;
+
+ bfin_keycodecpy(bf54x_kpad->keycode, pdata->keymap, pdata->keymapsize);
+
+ /* setup input device */
+ __set_bit(EV_KEY, input->evbit);
+
+ if (pdata->repeat)
+ __set_bit(EV_REP, input->evbit);
+
+ for (i = 0; i < input->keycodemax; i++)
+ __set_bit(bf54x_kpad->keycode[i] & KEY_MAX, input->keybit);
+ __clear_bit(KEY_RESERVED, input->keybit);
+
+ error = input_register_device(input);
+ if (error) {
+ printk(KERN_ERR DRV_NAME
+ ": Unable to register input device (%d)\n", error);
+ goto out4;
+ }
+
+ /* Init Keypad Key Up/Release test timer */
+
+ setup_timer(&bf54x_kpad->timer, bfin_kpad_timer, (unsigned long) pdev);
+
+ bfin_write_KPAD_PRESCALE(bfin_kpad_get_prescale(TIME_SCALE));
+
+ bfin_write_KPAD_CTL((((pdata->cols - 1) << 13) & KPAD_COLEN) |
+ (((pdata->rows - 1) << 10) & KPAD_ROWEN) |
+ (2 & KPAD_IRQMODE));
+
+ bfin_write_KPAD_CTL(bfin_read_KPAD_CTL() | KPAD_EN);
+
+ printk(KERN_ERR DRV_NAME
+ ": Blackfin BF54x Keypad registered IRQ %d\n", bf54x_kpad->irq);
+
+ return 0;
+
+out4:
+ input_free_device(input);
+out3:
+ free_irq(bf54x_kpad->irq, pdev);
+out2:
+ peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]);
+out1:
+ peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]);
+out0:
+ kfree(bf54x_kpad->keycode);
+out:
+ kfree(bf54x_kpad);
+ platform_set_drvdata(pdev, NULL);
+
+ return error;
+}
+
+static int __devexit bfin_kpad_remove(struct platform_device *pdev)
+{
+ struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data;
+ struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev);
+
+ del_timer_sync(&bf54x_kpad->timer);
+ free_irq(bf54x_kpad->irq, pdev);
+
+ input_unregister_device(bf54x_kpad->input);
+
+ peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]);
+ peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]);
+
+ kfree(bf54x_kpad->keycode);
+ kfree(bf54x_kpad);
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+struct platform_driver bfin_kpad_device_driver = {
+ .probe = bfin_kpad_probe,
+ .remove = __devexit_p(bfin_kpad_remove),
+ .driver = {
+ .name = DRV_NAME,
+ }
+};
+
+static int __init bfin_kpad_init(void)
+{
+ return platform_driver_register(&bfin_kpad_device_driver);
+}
+
+static void __exit bfin_kpad_exit(void)
+{
+ platform_driver_unregister(&bfin_kpad_device_driver);
+}
+
+module_init(bfin_kpad_init);
+module_exit(bfin_kpad_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Keypad driver for BF54x Processors");
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index f0b22b8..e2a3293 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -54,6 +54,7 @@
struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
struct input_dev *input;
int i, error;
+ int wakeup = 0;
input = input_allocate_device();
if (!input)
@@ -77,31 +78,51 @@
int irq = gpio_to_irq(button->gpio);
unsigned int type = button->type ?: EV_KEY;
- set_irq_type(irq, IRQ_TYPE_EDGE_BOTH);
- error = request_irq(irq, gpio_keys_isr, IRQF_SAMPLE_RANDOM,
- button->desc ? button->desc : "gpio_keys",
- pdev);
+ if (irq < 0) {
+ error = irq;
+ printk(KERN_ERR
+ "gpio-keys: "
+ "Unable to get irq number for GPIO %d,"
+ "error %d\n",
+ button->gpio, error);
+ goto fail;
+ }
+
+ error = request_irq(irq, gpio_keys_isr,
+ IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_RISING |
+ IRQF_TRIGGER_FALLING,
+ button->desc ? button->desc : "gpio_keys",
+ pdev);
if (error) {
- printk(KERN_ERR "gpio-keys: unable to claim irq %d; error %d\n",
+ printk(KERN_ERR
+ "gpio-keys: Unable to claim irq %d; error %d\n",
irq, error);
goto fail;
}
+ if (button->wakeup)
+ wakeup = 1;
+
input_set_capability(input, type, button->code);
}
error = input_register_device(input);
if (error) {
- printk(KERN_ERR "Unable to register gpio-keys input device\n");
+ printk(KERN_ERR
+ "gpio-keys: Unable to register input device, "
+ "error: %d\n", error);
goto fail;
}
+ device_init_wakeup(&pdev->dev, wakeup);
+
return 0;
fail:
- for (i = i - 1; i >= 0; i--)
+ while (--i >= 0)
free_irq(gpio_to_irq(pdata->buttons[i].gpio), pdev);
+ platform_set_drvdata(pdev, NULL);
input_free_device(input);
return error;
@@ -113,6 +134,8 @@
struct input_dev *input = platform_get_drvdata(pdev);
int i;
+ device_init_wakeup(&pdev->dev, 0);
+
for (i = 0; i < pdata->nbuttons; i++) {
int irq = gpio_to_irq(pdata->buttons[i].gpio);
free_irq(irq, pdev);
@@ -123,9 +146,53 @@
return 0;
}
+
+#ifdef CONFIG_PM
+static int gpio_keys_suspend(struct platform_device *pdev, pm_message_t state)
+{
+ struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+ int i;
+
+ if (device_may_wakeup(&pdev->dev)) {
+ for (i = 0; i < pdata->nbuttons; i++) {
+ struct gpio_keys_button *button = &pdata->buttons[i];
+ if (button->wakeup) {
+ int irq = gpio_to_irq(button->gpio);
+ enable_irq_wake(irq);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gpio_keys_resume(struct platform_device *pdev)
+{
+ struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+ int i;
+
+ if (device_may_wakeup(&pdev->dev)) {
+ for (i = 0; i < pdata->nbuttons; i++) {
+ struct gpio_keys_button *button = &pdata->buttons[i];
+ if (button->wakeup) {
+ int irq = gpio_to_irq(button->gpio);
+ disable_irq_wake(irq);
+ }
+ }
+ }
+
+ return 0;
+}
+#else
+#define gpio_keys_suspend NULL
+#define gpio_keys_resume NULL
+#endif
+
struct platform_driver gpio_keys_device_driver = {
.probe = gpio_keys_probe,
.remove = __devexit_p(gpio_keys_remove),
+ .suspend = gpio_keys_suspend,
+ .resume = gpio_keys_resume,
.driver = {
.name = "gpio-keys",
}
diff --git a/drivers/input/keyboard/jornada680_kbd.c b/drivers/input/keyboard/jornada680_kbd.c
new file mode 100644
index 0000000..bec1cf4
--- /dev/null
+++ b/drivers/input/keyboard/jornada680_kbd.c
@@ -0,0 +1,277 @@
+/*
+ * drivers/input/keyboard/jornada680_kbd.c
+ *
+ * HP Jornada 620/660/680/690 scan keyboard platform driver
+ * Copyright (C) 2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
+ *
+ * Based on hp680_keyb.c
+ * Copyright (C) 2006 Paul Mundt
+ * Copyright (C) 2005 Andriy Skulysh
+ * Split from drivers/input/keyboard/hp600_keyb.c
+ * Copyright (C) 2000 Yaegashi Takeshi (hp6xx kbd scan routine and translation table)
+ * Copyright (C) 2000 Niibe Yutaka (HP620 Keyb translation table)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/input-polldev.h>
+#include <linux/jiffies.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+
+#define PCCR 0xa4000104
+#define PDCR 0xa4000106
+#define PECR 0xa4000108
+#define PFCR 0xa400010a
+#define PCDR 0xa4000124
+#define PDDR 0xa4000126
+#define PEDR 0xa4000128
+#define PFDR 0xa400012a
+#define PGDR 0xa400012c
+#define PHDR 0xa400012e
+#define PJDR 0xa4000130
+#define PKDR 0xa4000132
+#define PLDR 0xa4000134
+
+static const unsigned short jornada_scancodes[] = {
+/* PTD1 */ KEY_CAPSLOCK, KEY_MACRO, KEY_LEFTCTRL, 0, KEY_ESC, 0, 0, 0, /* 1 -> 8 */
+ KEY_F1, KEY_F2, KEY_F3, KEY_F8, KEY_F7, KEY_F2, KEY_F4, KEY_F5, /* 9 -> 16 */
+/* PTD5 */ KEY_SLASH, KEY_APOSTROPHE, KEY_ENTER, 0, KEY_Z, 0, 0, 0, /* 17 -> 24 */
+ KEY_X, KEY_C, KEY_V, KEY_DOT, KEY_COMMA, KEY_M, KEY_B, KEY_N, /* 25 -> 32 */
+/* PTD7 */ KEY_KP2, KEY_KP6, 0, 0, 0, 0, 0, 0, /* 33 -> 40 */
+ 0, 0, 0, KEY_KP4, 0, 0, KEY_LEFTALT, KEY_HANJA, /* 41 -> 48 */
+/* PTE0 */ 0, 0, 0, 0, KEY_FINANCE, 0, 0, 0, /* 49 -> 56 */
+ KEY_LEFTCTRL, 0, KEY_SPACE, KEY_KPDOT, KEY_VOLUMEUP, 249, 0, 0, /* 57 -> 64 */
+/* PTE1 */ KEY_SEMICOLON, KEY_RIGHTBRACE, KEY_BACKSLASH, 0, KEY_A, 0, 0, 0,/* 65 -> 72 */
+ KEY_S, KEY_D, KEY_F, KEY_L, KEY_K, KEY_J, KEY_G, KEY_H, /* 73 -> 80 */
+/* PTE3 */ KEY_KP8, KEY_LEFTMETA, KEY_RIGHTSHIFT, 0, KEY_TAB, 0, 0,0, /* 81 -> 88 */
+ 0, KEY_LEFTSHIFT, 0, 0, 0, 0, 0, 0, /* 89 -> 96 */
+/* PTE6 */ KEY_P, KEY_LEFTBRACE, KEY_BACKSPACE, 0, KEY_Q, 0, 0, 0, /* 97 -> 104 */
+ KEY_W, KEY_E, KEY_R, KEY_O, KEY_I, KEY_U, KEY_T, KEY_R, /* 105 -> 112 */
+/* PTE7 */ KEY_0, KEY_MINUS, KEY_EQUAL, 0, KEY_1, 0, 0, 0, /* 113 -> 120 */
+ KEY_2, KEY_3, KEY_4, KEY_9, KEY_8, KEY_7, KEY_5, KEY_6, /* 121 -> 128 */
+/* **** */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0
+};
+
+#define JORNADA_SCAN_SIZE 18
+
+struct jornadakbd {
+ struct input_polled_dev *poll_dev;
+ unsigned short keymap[ARRAY_SIZE(jornada_scancodes)];
+ unsigned char length;
+ unsigned char old_scan[JORNADA_SCAN_SIZE];
+ unsigned char new_scan[JORNADA_SCAN_SIZE];
+};
+
+static void jornada_parse_kbd(struct jornadakbd *jornadakbd)
+{
+ struct input_dev *input_dev = jornadakbd->poll_dev->input;
+ unsigned short *keymap = jornadakbd->keymap;
+ unsigned int sync_me = 0;
+ unsigned int i, j;
+
+ for (i = 0; i < JORNADA_SCAN_SIZE; i++) {
+ unsigned char new = jornadakbd->new_scan[i];
+ unsigned char old = jornadakbd->old_scan[i];
+ unsigned int xor = new ^ old;
+
+ if (xor == 0)
+ continue;
+
+ for (j = 0; j < 8; j++) {
+ unsigned int bit = 1 << j;
+ if (xor & bit) {
+ unsigned int scancode = (i << 3) + j;
+ input_event(input_dev,
+ EV_MSC, MSC_SCAN, scancode);
+ input_report_key(input_dev,
+ keymap[scancode],
+ !(new & bit));
+ sync_me = 1;
+ }
+ }
+ }
+
+ if (sync_me)
+ input_sync(input_dev);
+}
+
+static void jornada_scan_keyb(unsigned char *s)
+{
+ int i;
+ unsigned short ec_static, dc_static; /* = UINT16_t */
+ unsigned char matrix_switch[] = {
+ 0xfd, 0xff, /* PTD1 PD(1) */
+ 0xdf, 0xff, /* PTD5 PD(5) */
+ 0x7f, 0xff, /* PTD7 PD(7) */
+ 0xff, 0xfe, /* PTE0 PE(0) */
+ 0xff, 0xfd, /* PTE1 PE(1) */
+ 0xff, 0xf7, /* PTE3 PE(3) */
+ 0xff, 0xbf, /* PTE6 PE(6) */
+ 0xff, 0x7f, /* PTE7 PE(7) */
+ }, *t = matrix_switch;
+ /* PD(x) :
+ 1. 0xcc0c & (1~(1 << (2*(x)+1)))))
+ 2. (0xf0cf & 0xfffff) */
+ /* PE(x) :
+ 1. 0xcc0c & 0xffff
+ 2. 0xf0cf & (1~(1 << (2*(x)+1))))) */
+ unsigned short matrix_PDE[] = {
+ 0xcc04, 0xf0cf, /* PD(1) */
+ 0xc40c, 0xf0cf, /* PD(5) */
+ 0x4c0c, 0xf0cf, /* PD(7) */
+ 0xcc0c, 0xf0cd, /* PE(0) */
+ 0xcc0c, 0xf0c7, /* PE(1) */
+ 0xcc0c, 0xf04f, /* PE(3) */
+ 0xcc0c, 0xd0cf, /* PE(6) */
+ 0xcc0c, 0x70cf, /* PE(7) */
+ }, *y = matrix_PDE;
+
+ /* Save these control reg bits */
+ dc_static = (ctrl_inw(PDCR) & (~0xcc0c));
+ ec_static = (ctrl_inw(PECR) & (~0xf0cf));
+
+ for (i = 0; i < 8; i++) {
+ /* disable output for all but the one we want to scan */
+ ctrl_outw((dc_static | *y++), PDCR);
+ ctrl_outw((ec_static | *y++), PECR);
+ udelay(5);
+
+ /* Get scanline row */
+ ctrl_outb(*t++, PDDR);
+ ctrl_outb(*t++, PEDR);
+ udelay(50);
+
+ /* Read data */
+ *s++ = ctrl_inb(PCDR);
+ *s++ = ctrl_inb(PFDR);
+ }
+ /* Scan no lines */
+ ctrl_outb(0xff, PDDR);
+ ctrl_outb(0xff, PEDR);
+
+ /* Enable all scanlines */
+ ctrl_outw((dc_static | (0x5555 & 0xcc0c)),PDCR);
+ ctrl_outw((ec_static | (0x5555 & 0xf0cf)),PECR);
+
+ /* Ignore extra keys and events */
+ *s++ = ctrl_inb(PGDR);
+ *s++ = ctrl_inb(PHDR);
+}
+
+static void jornadakbd680_poll(struct input_polled_dev *dev)
+{
+ struct jornadakbd *jornadakbd = dev->private;
+
+ jornada_scan_keyb(jornadakbd->new_scan);
+ jornada_parse_kbd(jornadakbd);
+ memcpy(jornadakbd->old_scan, jornadakbd->new_scan, JORNADA_SCAN_SIZE);
+}
+
+static int __devinit jornada680kbd_probe(struct platform_device *pdev)
+{
+ struct jornadakbd *jornadakbd;
+ struct input_polled_dev *poll_dev;
+ struct input_dev *input_dev;
+ int i, error;
+
+ jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL);
+ if (!jornadakbd)
+ return -ENOMEM;
+
+ poll_dev = input_allocate_polled_device();
+ if (!poll_dev) {
+ error = -ENOMEM;
+ goto failed;
+ }
+
+ platform_set_drvdata(pdev, jornadakbd);
+
+ jornadakbd->poll_dev = poll_dev;
+
+ memcpy(jornadakbd->keymap, jornada_scancodes,
+ sizeof(jornadakbd->keymap));
+
+ poll_dev->private = jornadakbd;
+ poll_dev->poll = jornadakbd680_poll;
+ poll_dev->poll_interval = 50; /* msec */
+
+ input_dev = poll_dev->input;
+ input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+ input_dev->name = "HP Jornada 680 keyboard";
+ input_dev->phys = "jornadakbd/input0";
+ input_dev->keycode = jornadakbd->keymap;
+ input_dev->keycodesize = sizeof(unsigned short);
+ input_dev->keycodemax = ARRAY_SIZE(jornada_scancodes);
+ input_dev->dev.parent = &pdev->dev;
+ input_dev->id.bustype = BUS_HOST;
+
+ for (i = 0; i < 128; i++)
+ if (jornadakbd->keymap[i])
+ __set_bit(jornadakbd->keymap[i], input_dev->keybit);
+ __clear_bit(KEY_RESERVED, input_dev->keybit);
+
+ input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+
+ error = input_register_polled_device(jornadakbd->poll_dev);
+ if (error)
+ goto failed;
+
+ return 0;
+
+ failed:
+ printk(KERN_ERR "Jornadakbd: failed to register driver, error: %d\n",
+ error);
+ platform_set_drvdata(pdev, NULL);
+ input_free_polled_device(poll_dev);
+ kfree(jornadakbd);
+ return error;
+
+}
+
+static int __devexit jornada680kbd_remove(struct platform_device *pdev)
+{
+ struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
+
+ platform_set_drvdata(pdev, NULL);
+ input_unregister_polled_device(jornadakbd->poll_dev);
+ input_free_polled_device(jornadakbd->poll_dev);
+ kfree(jornadakbd);
+
+ return 0;
+}
+
+static struct platform_driver jornada680kbd_driver = {
+ .driver = {
+ .name = "jornada680_kbd",
+ },
+ .probe = jornada680kbd_probe,
+ .remove = __devexit_p(jornada680kbd_remove),
+};
+
+static int __init jornada680kbd_init(void)
+{
+ return platform_driver_register(&jornada680kbd_driver);
+}
+
+static void __exit jornada680kbd_exit(void)
+{
+ platform_driver_unregister(&jornada680kbd_driver);
+}
+
+module_init(jornada680kbd_init);
+module_exit(jornada680kbd_exit);
+
+MODULE_AUTHOR("Kristoffer Ericson <kristoffer.ericson@gmail.com>");
+MODULE_DESCRIPTION("HP Jornada 620/660/680/690 Keyboard Driver");
+MODULE_LICENSE("GPLv2");
diff --git a/drivers/input/keyboard/jornada720_kbd.c b/drivers/input/keyboard/jornada720_kbd.c
new file mode 100644
index 0000000..e6696b3
--- /dev/null
+++ b/drivers/input/keyboard/jornada720_kbd.c
@@ -0,0 +1,185 @@
+/*
+ * drivers/input/keyboard/jornada720_kbd.c
+ *
+ * HP Jornada 720 keyboard platform driver
+ *
+ * Copyright (C) 2006/2007 Kristoffer Ericson <Kristoffer.Ericson@Gmail.com>
+ *
+ * Copyright (C) 2006 jornada 720 kbd driver by
+ Filip Zyzniewsk <Filip.Zyzniewski@tefnet.plX
+ * based on (C) 2004 jornada 720 kbd driver by
+ Alex Lange <chicken@handhelds.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/arch/jornada720.h>
+#include <asm/hardware.h>
+
+MODULE_AUTHOR("Kristoffer Ericson <Kristoffer.Ericson@gmail.com>");
+MODULE_DESCRIPTION("HP Jornada 710/720/728 keyboard driver");
+MODULE_LICENSE("GPLv2");
+
+static unsigned short jornada_std_keymap[128] = { /* ROW */
+ 0, KEY_ESC, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, /* #1 */
+ KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE, /* -> */
+ 0, KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, /* #2 */
+ KEY_0, KEY_MINUS, KEY_EQUAL,0, 0, 0, /* -> */
+ 0, KEY_Q, KEY_W, KEY_E, KEY_R, KEY_T, KEY_Y, KEY_U, KEY_I, KEY_O, /* #3 */
+ KEY_P, KEY_BACKSLASH, KEY_BACKSPACE, 0, 0, 0, /* -> */
+ 0, KEY_A, KEY_S, KEY_D, KEY_F, KEY_G, KEY_H, KEY_J, KEY_K, KEY_L, /* #4 */
+ KEY_SEMICOLON, KEY_LEFTBRACE, KEY_RIGHTBRACE, 0, 0, 0, /* -> */
+ 0, KEY_Z, KEY_X, KEY_C, KEY_V, KEY_B, KEY_N, KEY_M, KEY_COMMA, /* #5 */
+ KEY_DOT, KEY_KPMINUS, KEY_APOSTROPHE, KEY_ENTER, 0, 0,0, /* -> */
+ 0, KEY_TAB, 0, KEY_LEFTSHIFT, 0, KEY_APOSTROPHE, 0, 0, 0, 0, /* #6 */
+ KEY_UP, 0, KEY_RIGHTSHIFT, 0, 0, 0,0, 0, 0, 0, 0, KEY_LEFTALT, KEY_GRAVE, /* -> */
+ 0, 0, KEY_LEFT, KEY_DOWN, KEY_RIGHT, 0, 0, 0, 0,0, KEY_KPASTERISK, /* -> */
+ KEY_LEFTCTRL, 0, KEY_SPACE, 0, 0, 0, KEY_SLASH, KEY_DELETE, 0, 0, /* -> */
+ 0, 0, 0, KEY_POWER, /* -> */
+};
+
+struct jornadakbd {
+ unsigned short keymap[ARRAY_SIZE(jornada_std_keymap)];
+ struct input_dev *input;
+};
+
+static irqreturn_t jornada720_kbd_interrupt(int irq, void *dev_id)
+{
+ struct platform_device *pdev = dev_id;
+ struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
+ struct input_dev *input = jornadakbd->input;
+ u8 count, kbd_data, scan_code;
+
+ /* startup ssp with spinlock */
+ jornada_ssp_start();
+
+ if (jornada_ssp_inout(GETSCANKEYCODE) != TXDUMMY) {
+ printk(KERN_DEBUG
+ "jornada720_kbd: "
+ "GetKeycode command failed with ETIMEDOUT, "
+ "flushed bus\n");
+ } else {
+ /* How many keycodes are waiting for us? */
+ count = jornada_ssp_byte(TXDUMMY);
+
+ /* Lets drag them out one at a time */
+ while (count--) {
+ /* Exchange TxDummy for location (keymap[kbddata]) */
+ kbd_data = jornada_ssp_byte(TXDUMMY);
+ scan_code = kbd_data & 0x7f;
+
+ input_event(input, EV_MSC, MSC_SCAN, scan_code);
+ input_report_key(input, jornadakbd->keymap[scan_code],
+ !(kbd_data & 0x80));
+ input_sync(input);
+ }
+ }
+
+ /* release spinlock and turn off ssp */
+ jornada_ssp_end();
+
+ return IRQ_HANDLED;
+};
+
+static int __devinit jornada720_kbd_probe(struct platform_device *pdev)
+{
+ struct jornadakbd *jornadakbd;
+ struct input_dev *input_dev;
+ int i, err;
+
+ jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL);
+ input_dev = input_allocate_device();
+ if (!jornadakbd || !input_dev) {
+ err = -ENOMEM;
+ goto fail1;
+ }
+
+ platform_set_drvdata(pdev, jornadakbd);
+
+ memcpy(jornadakbd->keymap, jornada_std_keymap,
+ sizeof(jornada_std_keymap));
+ jornadakbd->input = input_dev;
+
+ input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+ input_dev->name = "HP Jornada 720 keyboard";
+ input_dev->phys = "jornadakbd/input0";
+ input_dev->keycode = jornadakbd->keymap;
+ input_dev->keycodesize = sizeof(unsigned short);
+ input_dev->keycodemax = ARRAY_SIZE(jornada_std_keymap);
+ input_dev->id.bustype = BUS_HOST;
+ input_dev->dev.parent = &pdev->dev;
+
+ for (i = 0; i < ARRAY_SIZE(jornadakbd->keymap); i++)
+ __set_bit(jornadakbd->keymap[i], input_dev->keybit);
+ __clear_bit(KEY_RESERVED, input_dev->keybit);
+
+ input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+
+ err = request_irq(IRQ_GPIO0,
+ jornada720_kbd_interrupt,
+ IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+ "jornadakbd", pdev);
+ if (err) {
+ printk(KERN_INFO "jornadakbd720_kbd: Unable to grab IRQ\n");
+ goto fail1;
+ }
+
+ err = input_register_device(jornadakbd->input);
+ if (err)
+ goto fail2;
+
+ return 0;
+
+ fail2: /* IRQ, DEVICE, MEMORY */
+ free_irq(IRQ_GPIO0, pdev);
+ fail1: /* DEVICE, MEMORY */
+ platform_set_drvdata(pdev, NULL);
+ input_free_device(input_dev);
+ kfree(jornadakbd);
+ return err;
+};
+
+static int __devexit jornada720_kbd_remove(struct platform_device *pdev)
+{
+ struct jornadakbd *jornadakbd = platform_get_drvdata(pdev);
+
+ free_irq(IRQ_GPIO0, pdev);
+ platform_set_drvdata(pdev, NULL);
+ input_unregister_device(jornadakbd->input);
+ kfree(jornadakbd);
+
+ return 0;
+}
+
+static struct platform_driver jornada720_kbd_driver = {
+ .driver = {
+ .name = "jornada720_kbd",
+ },
+ .probe = jornada720_kbd_probe,
+ .remove = __devexit_p(jornada720_kbd_remove),
+};
+
+static int __init jornada720_kbd_init(void)
+{
+ return platform_driver_register(&jornada720_kbd_driver);
+}
+
+static void __exit jornada720_kbd_exit(void)
+{
+ platform_driver_unregister(&jornada720_kbd_driver);
+}
+
+module_init(jornada720_kbd_init);
+module_exit(jornada720_kbd_exit);
diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
new file mode 100644
index 0000000..2b40428
--- /dev/null
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -0,0 +1,252 @@
+/*
+ * SEGA Dreamcast keyboard driver
+ * Based on drivers/usb/usbkbd.c
+ * Copyright YAEGASHI Takeshi, 2001
+ * Porting to 2.6 Copyright Adrian McMenamin, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/timer.h>
+#include <linux/maple.h>
+#include <asm/mach/maple.h>
+
+/* Very simple mutex to ensure proper cleanup */
+static DEFINE_MUTEX(maple_keyb_mutex);
+
+#define NR_SCANCODES 256
+
+MODULE_AUTHOR("YAEGASHI Takeshi, Adrian McMenamin");
+MODULE_DESCRIPTION("SEGA Dreamcast keyboard driver");
+MODULE_LICENSE("GPL");
+
+struct dc_kbd {
+ struct input_dev *dev;
+ unsigned short keycode[NR_SCANCODES];
+ unsigned char new[8];
+ unsigned char old[8];
+};
+
+static const unsigned short dc_kbd_keycode[NR_SCANCODES] = {
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_A, KEY_B, KEY_C, KEY_D,
+ KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J, KEY_K, KEY_L,
+ KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T,
+ KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z, KEY_1, KEY_2,
+ KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0,
+ KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS, KEY_EQUAL, KEY_LEFTBRACE,
+ KEY_RIGHTBRACE, KEY_BACKSLASH, KEY_BACKSLASH, KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA,
+ KEY_DOT, KEY_SLASH, KEY_CAPSLOCK, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6,
+ KEY_F7, KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_F12, KEY_SYSRQ,
+ KEY_SCROLLLOCK, KEY_PAUSE, KEY_INSERT, KEY_HOME, KEY_PAGEUP, KEY_DELETE,
+ KEY_END, KEY_PAGEDOWN, KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP,
+ KEY_NUMLOCK, KEY_KPSLASH, KEY_KPASTERISK, KEY_KPMINUS, KEY_KPPLUS, KEY_KPENTER, KEY_KP1, KEY_KP2,
+ KEY_KP3, KEY_KP4, KEY_KP5, KEY_KP6, KEY_KP7, KEY_KP8, KEY_KP9, KEY_KP0, KEY_KPDOT,
+ KEY_102ND, KEY_COMPOSE, KEY_POWER, KEY_KPEQUAL, KEY_F13, KEY_F14, KEY_F15,
+ KEY_F16, KEY_F17, KEY_F18, KEY_F19, KEY_F20,
+ KEY_F21, KEY_F22, KEY_F23, KEY_F24, KEY_OPEN, KEY_HELP, KEY_PROPS, KEY_FRONT,
+ KEY_STOP, KEY_AGAIN, KEY_UNDO, KEY_CUT, KEY_COPY, KEY_PASTE, KEY_FIND, KEY_MUTE,
+ KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_KPCOMMA, KEY_RESERVED, KEY_RO, KEY_KATAKANAHIRAGANA , KEY_YEN,
+ KEY_HENKAN, KEY_MUHENKAN, KEY_KPJPCOMMA, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_HANGEUL, KEY_HANJA, KEY_KATAKANA, KEY_HIRAGANA, KEY_ZENKAKUHANKAKU, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED,
+ KEY_LEFTCTRL, KEY_LEFTSHIFT, KEY_LEFTALT, KEY_LEFTMETA, KEY_RIGHTCTRL, KEY_RIGHTSHIFT, KEY_RIGHTALT, KEY_RIGHTMETA,
+ KEY_PLAYPAUSE, KEY_STOPCD, KEY_PREVIOUSSONG, KEY_NEXTSONG, KEY_EJECTCD, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE,
+ KEY_WWW, KEY_BACK, KEY_FORWARD, KEY_STOP, KEY_FIND, KEY_SCROLLUP, KEY_SCROLLDOWN, KEY_EDIT, KEY_SLEEP,
+ KEY_SCREENLOCK, KEY_REFRESH, KEY_CALC, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED
+};
+
+static void dc_scan_kbd(struct dc_kbd *kbd)
+{
+ struct input_dev *dev = kbd->dev;
+ void *ptr;
+ int code, keycode;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ code = i + 224;
+ keycode = kbd->keycode[code];
+ input_event(dev, EV_MSC, MSC_SCAN, code);
+ input_report_key(dev, keycode, (kbd->new[0] >> i) & 1);
+ }
+
+ for (i = 2; i < 8; i++) {
+ ptr = memchr(kbd->new + 2, kbd->old[i], 6);
+ code = kbd->old[i];
+ if (code > 3 && ptr == NULL) {
+ keycode = kbd->keycode[code];
+ if (keycode) {
+ input_event(dev, EV_MSC, MSC_SCAN, code);
+ input_report_key(dev, keycode, 0);
+ } else
+ printk(KERN_DEBUG "maple_keyb: "
+ "Unknown key (scancode %#x) released.",
+ code);
+ }
+ ptr = memchr(kbd->old + 2, kbd->new[i], 6);
+ code = kbd->new[i];
+ if (code > 3 && ptr) {
+ keycode = kbd->keycode[code];
+ if (keycode) {
+ input_event(dev, EV_MSC, MSC_SCAN, code);
+ input_report_key(dev, keycode, 1);
+ } else
+ printk(KERN_DEBUG "maple_keyb: "
+ "Unknown key (scancode %#x) pressed.",
+ code);
+ }
+ }
+ input_sync(dev);
+ memcpy(kbd->old, kbd->new, 8);
+}
+
+static void dc_kbd_callback(struct mapleq *mq)
+{
+ struct maple_device *mapledev = mq->dev;
+ struct dc_kbd *kbd = mapledev->private_data;
+ unsigned long *buf = mq->recvbuf;
+
+ /*
+ * We should always be getting the lock because the only
+ * time it may be locked if driver is in cleanup phase.
+ */
+ if (likely(mutex_trylock(&maple_keyb_mutex))) {
+
+ if (buf[1] == mapledev->function) {
+ memcpy(kbd->new, buf + 2, 8);
+ dc_scan_kbd(kbd);
+ }
+
+ mutex_unlock(&maple_keyb_mutex);
+ }
+}
+
+static int dc_kbd_connect(struct maple_device *mdev)
+{
+ int i, error;
+ struct dc_kbd *kbd;
+ struct input_dev *dev;
+
+ if (!(mdev->function & MAPLE_FUNC_KEYBOARD))
+ return -EINVAL;
+
+ kbd = kzalloc(sizeof(struct dc_kbd), GFP_KERNEL);
+ dev = input_allocate_device();
+ if (!kbd || !dev) {
+ error = -ENOMEM;
+ goto fail;
+ }
+
+ mdev->private_data = kbd;
+
+ kbd->dev = dev;
+ memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode));
+
+ dev->name = mdev->product_name;
+ dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+ dev->keycode = kbd->keycode;
+ dev->keycodesize = sizeof (unsigned short);
+ dev->keycodemax = ARRAY_SIZE(kbd->keycode);
+ dev->id.bustype = BUS_HOST;
+ dev->dev.parent = &mdev->dev;
+
+ for (i = 0; i < NR_SCANCODES; i++)
+ __set_bit(dc_kbd_keycode[i], dev->keybit);
+ __clear_bit(KEY_RESERVED, dev->keybit);
+
+ input_set_capability(dev, EV_MSC, MSC_SCAN);
+ input_set_drvdata(dev, kbd);
+
+ error = input_register_device(dev);
+ if (error)
+ goto fail;
+
+ /* Maple polling is locked to VBLANK - which may be just 50/s */
+ maple_getcond_callback(mdev, dc_kbd_callback, HZ/50, MAPLE_FUNC_KEYBOARD);
+ return 0;
+
+ fail:
+ input_free_device(dev);
+ kfree(kbd);
+ mdev->private_data = NULL;
+ return error;
+}
+
+static void dc_kbd_disconnect(struct maple_device *mdev)
+{
+ struct dc_kbd *kbd;
+
+ mutex_lock(&maple_keyb_mutex);
+
+ kbd = mdev->private_data;
+ mdev->private_data = NULL;
+ input_unregister_device(kbd->dev);
+ kfree(kbd);
+
+ mutex_unlock(&maple_keyb_mutex);
+}
+
+/* allow the keyboard to be used */
+static int probe_maple_kbd(struct device *dev)
+{
+ struct maple_device *mdev = to_maple_dev(dev);
+ struct maple_driver *mdrv = to_maple_driver(dev->driver);
+ int error;
+
+ error = dc_kbd_connect(mdev);
+ if (error)
+ return error;
+
+ mdev->driver = mdrv;
+ mdev->registered = 1;
+
+ return 0;
+}
+
+static struct maple_driver dc_kbd_driver = {
+ .function = MAPLE_FUNC_KEYBOARD,
+ .connect = dc_kbd_connect,
+ .disconnect = dc_kbd_disconnect,
+ .drv = {
+ .name = "Dreamcast_keyboard",
+ .probe = probe_maple_kbd,
+ },
+};
+
+static int __init dc_kbd_init(void)
+{
+ return maple_driver_register(&dc_kbd_driver.drv);
+}
+
+static void __exit dc_kbd_exit(void)
+{
+ driver_unregister(&dc_kbd_driver.drv);
+}
+
+module_init(dc_kbd_init);
+module_exit(dc_kbd_exit);
diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index 3a22863..76f1969 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c
@@ -233,7 +233,7 @@
omap_writew(0, OMAP_MPUIO_BASE + OMAP_MPUIO_KBD_MASKIT);
kp_cur_group = -1;
}
- }
+ }
}
static ssize_t omap_kp_enable_show(struct device *dev,
@@ -318,7 +318,7 @@
keymap = pdata->keymap;
if (pdata->rep)
- set_bit(EV_REP, input_dev->evbit);
+ __set_bit(EV_REP, input_dev->evbit);
if (pdata->delay)
omap_kp->delay = pdata->delay;
@@ -365,9 +365,9 @@
goto err2;
/* setup input device */
- set_bit(EV_KEY, input_dev->evbit);
+ __set_bit(EV_KEY, input_dev->evbit);
for (i = 0; keymap[i] != 0; i++)
- set_bit(keymap[i] & KEY_MAX, input_dev->keybit);
+ __set_bit(keymap[i] & KEY_MAX, input_dev->keybit);
input_dev->name = "omap-keypad";
input_dev->phys = "omap-keypad/input0";
input_dev->dev.parent = &pdev->dev;
@@ -377,10 +377,6 @@
input_dev->id.product = 0x0001;
input_dev->id.version = 0x0100;
- input_dev->keycode = keymap;
- input_dev->keycodesize = sizeof(unsigned int);
- input_dev->keycodemax = pdata->keymapsize;
-
ret = input_register_device(omap_kp->input);
if (ret < 0) {
printk(KERN_ERR "Unable to register omap-keypad input device\n");
@@ -403,15 +399,15 @@
} else {
for (irq_idx = 0; irq_idx < omap_kp->rows; irq_idx++) {
if (request_irq(OMAP_GPIO_IRQ(row_gpios[irq_idx]),
- omap_kp_interrupt,
+ omap_kp_interrupt,
IRQF_TRIGGER_FALLING,
- "omap-keypad", omap_kp) < 0)
+ "omap-keypad", omap_kp) < 0)
goto err5;
}
}
return 0;
err5:
- for (i = irq_idx-1; i >=0; i--)
+ for (i = irq_idx - 1; i >=0; i--)
free_irq(row_gpios[i], 0);
err4:
input_unregister_device(omap_kp->input);
@@ -440,9 +436,9 @@
if (cpu_is_omap24xx()) {
int i;
for (i = 0; i < omap_kp->cols; i++)
- omap_free_gpio(col_gpios[i]);
+ omap_free_gpio(col_gpios[i]);
for (i = 0; i < omap_kp->rows; i++) {
- omap_free_gpio(row_gpios[i]);
+ omap_free_gpio(row_gpios[i]);
free_irq(OMAP_GPIO_IRQ(row_gpios[i]), 0);
}
} else {
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 2c5f11a..64d70a9 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -48,11 +48,13 @@
{ { 0x63, 0x02, 0x50 }, 0xef, 0xef, ALPS_FW_BK_1 }, /* NEC Versa L320 */
{ { 0x63, 0x02, 0x64 }, 0xf8, 0xf8, 0 },
{ { 0x63, 0x03, 0xc8 }, 0xf8, 0xf8, ALPS_PASS }, /* Dell Latitude D800 */
+ { { 0x73, 0x00, 0x0a }, 0xf8, 0xf8, ALPS_DUALPOINT }, /* ThinkPad R61 8918-5QG */
{ { 0x73, 0x02, 0x0a }, 0xf8, 0xf8, 0 },
{ { 0x73, 0x02, 0x14 }, 0xf8, 0xf8, ALPS_FW_BK_2 }, /* Ahtec Laptop */
{ { 0x20, 0x02, 0x0e }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* XXX */
{ { 0x22, 0x02, 0x0a }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT },
{ { 0x22, 0x02, 0x14 }, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude D600 */
+ { { 0x73, 0x02, 0x50 }, 0xcf, 0xff, ALPS_FW_BK_1 } /* Dell Vostro 1400 */
};
/*
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index a1804bf..0117817 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -502,18 +502,23 @@
/* reset the accumulator on release */
memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
+ }
- /* Geyser 3 will continue to send packets continually after
- the first touch unless reinitialised. Do so if it's been
- idle for a while in order to avoid waking the kernel up
- several hundred times a second */
- if (!key && atp_is_geyser_3(dev)) {
+ /* Geyser 3 will continue to send packets continually after
+ the first touch unless reinitialised. Do so if it's been
+ idle for a while in order to avoid waking the kernel up
+ several hundred times a second */
+
+ if (atp_is_geyser_3(dev)) {
+ if (!x && !y && !key) {
dev->idlecount++;
if (dev->idlecount == 10) {
dev->valid = 0;
schedule_work(&dev->work);
}
}
+ else
+ dev->idlecount = 0;
}
input_report_key(dev->input, BTN_LEFT, key);
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index 608674d..d7de4c5 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -97,6 +97,14 @@
.callback = lifebook_set_6byte_proto,
},
{
+ .ident = "CF-72",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "CF-72"),
+ },
+ .callback = lifebook_set_serio_phys,
+ .driver_data = "isa0060/serio3",
+ },
+ {
.ident = "Lifebook B142",
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook B142"),
@@ -282,7 +290,7 @@
int lifebook_init(struct psmouse *psmouse)
{
struct input_dev *dev1 = psmouse->dev;
- int max_coord = lifebook_use_6byte_proto ? 1024 : 4096;
+ int max_coord = lifebook_use_6byte_proto ? 4096 : 1024;
if (lifebook_absolute_mode(psmouse))
return -1;
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index b9f0fb2..0735257 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -648,9 +648,10 @@
/*
* Reset to defaults in case the device got confused by extended
- * protocol probes. Note that we do full reset becuase some mice
- * put themselves to sleep when see PSMOUSE_RESET_DIS.
+ * protocol probes. Note that we follow up with full reset because
+ * some mice put themselves to sleep when they see PSMOUSE_RESET_DIS.
*/
+ ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS);
psmouse_reset(psmouse);
if (max_proto >= PSMOUSE_IMEX && im_explorer_detect(psmouse, set_properties) == 0)
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 9173916..79146d6 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -61,9 +61,11 @@
int open;
int minor;
char name[16];
+ struct input_handle handle;
wait_queue_head_t wait;
struct list_head client_list;
- struct input_handle handle;
+ spinlock_t client_lock; /* protects client_list */
+ struct mutex mutex;
struct device dev;
struct list_head mixdev_node;
@@ -113,108 +115,137 @@
static struct input_handler mousedev_handler;
static struct mousedev *mousedev_table[MOUSEDEV_MINORS];
+static DEFINE_MUTEX(mousedev_table_mutex);
static struct mousedev *mousedev_mix;
static LIST_HEAD(mousedev_mix_list);
+static void mixdev_open_devices(void);
+static void mixdev_close_devices(void);
+
#define fx(i) (mousedev->old_x[(mousedev->pkt_count - (i)) & 03])
#define fy(i) (mousedev->old_y[(mousedev->pkt_count - (i)) & 03])
-static void mousedev_touchpad_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_touchpad_event(struct input_dev *dev,
+ struct mousedev *mousedev,
+ unsigned int code, int value)
{
int size, tmp;
enum { FRACTION_DENOM = 128 };
switch (code) {
- case ABS_X:
- fx(0) = value;
- if (mousedev->touch && mousedev->pkt_count >= 2) {
- size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
- if (size == 0)
- size = 256 * 2;
- tmp = ((value - fx(2)) * (256 * FRACTION_DENOM)) / size;
- tmp += mousedev->frac_dx;
- mousedev->packet.dx = tmp / FRACTION_DENOM;
- mousedev->frac_dx = tmp - mousedev->packet.dx * FRACTION_DENOM;
- }
- break;
- case ABS_Y:
- fy(0) = value;
- if (mousedev->touch && mousedev->pkt_count >= 2) {
- /* use X size to keep the same scale */
- size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
- if (size == 0)
- size = 256 * 2;
- tmp = -((value - fy(2)) * (256 * FRACTION_DENOM)) / size;
- tmp += mousedev->frac_dy;
- mousedev->packet.dy = tmp / FRACTION_DENOM;
- mousedev->frac_dy = tmp - mousedev->packet.dy * FRACTION_DENOM;
- }
- break;
+ case ABS_X:
+ fx(0) = value;
+ if (mousedev->touch && mousedev->pkt_count >= 2) {
+ size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
+ if (size == 0)
+ size = 256 * 2;
+ tmp = ((value - fx(2)) * 256 * FRACTION_DENOM) / size;
+ tmp += mousedev->frac_dx;
+ mousedev->packet.dx = tmp / FRACTION_DENOM;
+ mousedev->frac_dx =
+ tmp - mousedev->packet.dx * FRACTION_DENOM;
+ }
+ break;
+
+ case ABS_Y:
+ fy(0) = value;
+ if (mousedev->touch && mousedev->pkt_count >= 2) {
+ /* use X size to keep the same scale */
+ size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
+ if (size == 0)
+ size = 256 * 2;
+ tmp = -((value - fy(2)) * 256 * FRACTION_DENOM) / size;
+ tmp += mousedev->frac_dy;
+ mousedev->packet.dy = tmp / FRACTION_DENOM;
+ mousedev->frac_dy = tmp -
+ mousedev->packet.dy * FRACTION_DENOM;
+ }
+ break;
}
}
-static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev,
+ unsigned int code, int value)
{
int size;
switch (code) {
- case ABS_X:
- size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
- if (size == 0)
- size = xres ? : 1;
- if (value > dev->absmax[ABS_X])
- value = dev->absmax[ABS_X];
- if (value < dev->absmin[ABS_X])
- value = dev->absmin[ABS_X];
- mousedev->packet.x = ((value - dev->absmin[ABS_X]) * xres) / size;
- mousedev->packet.abs_event = 1;
- break;
- case ABS_Y:
- size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y];
- if (size == 0)
- size = yres ? : 1;
- if (value > dev->absmax[ABS_Y])
- value = dev->absmax[ABS_Y];
- if (value < dev->absmin[ABS_Y])
- value = dev->absmin[ABS_Y];
- mousedev->packet.y = yres - ((value - dev->absmin[ABS_Y]) * yres) / size;
- mousedev->packet.abs_event = 1;
- break;
+ case ABS_X:
+ size = dev->absmax[ABS_X] - dev->absmin[ABS_X];
+ if (size == 0)
+ size = xres ? : 1;
+ if (value > dev->absmax[ABS_X])
+ value = dev->absmax[ABS_X];
+ if (value < dev->absmin[ABS_X])
+ value = dev->absmin[ABS_X];
+ mousedev->packet.x =
+ ((value - dev->absmin[ABS_X]) * xres) / size;
+ mousedev->packet.abs_event = 1;
+ break;
+
+ case ABS_Y:
+ size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y];
+ if (size == 0)
+ size = yres ? : 1;
+ if (value > dev->absmax[ABS_Y])
+ value = dev->absmax[ABS_Y];
+ if (value < dev->absmin[ABS_Y])
+ value = dev->absmin[ABS_Y];
+ mousedev->packet.y = yres -
+ ((value - dev->absmin[ABS_Y]) * yres) / size;
+ mousedev->packet.abs_event = 1;
+ break;
}
}
-static void mousedev_rel_event(struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_rel_event(struct mousedev *mousedev,
+ unsigned int code, int value)
{
switch (code) {
- case REL_X: mousedev->packet.dx += value; break;
- case REL_Y: mousedev->packet.dy -= value; break;
- case REL_WHEEL: mousedev->packet.dz -= value; break;
+ case REL_X:
+ mousedev->packet.dx += value;
+ break;
+
+ case REL_Y:
+ mousedev->packet.dy -= value;
+ break;
+
+ case REL_WHEEL:
+ mousedev->packet.dz -= value;
+ break;
}
}
-static void mousedev_key_event(struct mousedev *mousedev, unsigned int code, int value)
+static void mousedev_key_event(struct mousedev *mousedev,
+ unsigned int code, int value)
{
int index;
switch (code) {
- case BTN_TOUCH:
- case BTN_0:
- case BTN_LEFT: index = 0; break;
- case BTN_STYLUS:
- case BTN_1:
- case BTN_RIGHT: index = 1; break;
- case BTN_2:
- case BTN_FORWARD:
- case BTN_STYLUS2:
- case BTN_MIDDLE: index = 2; break;
- case BTN_3:
- case BTN_BACK:
- case BTN_SIDE: index = 3; break;
- case BTN_4:
- case BTN_EXTRA: index = 4; break;
- default: return;
+
+ case BTN_TOUCH:
+ case BTN_0:
+ case BTN_LEFT: index = 0; break;
+
+ case BTN_STYLUS:
+ case BTN_1:
+ case BTN_RIGHT: index = 1; break;
+
+ case BTN_2:
+ case BTN_FORWARD:
+ case BTN_STYLUS2:
+ case BTN_MIDDLE: index = 2; break;
+
+ case BTN_3:
+ case BTN_BACK:
+ case BTN_SIDE: index = 3; break;
+
+ case BTN_4:
+ case BTN_EXTRA: index = 4; break;
+
+ default: return;
}
if (value) {
@@ -226,19 +257,23 @@
}
}
-static void mousedev_notify_readers(struct mousedev *mousedev, struct mousedev_hw_data *packet)
+static void mousedev_notify_readers(struct mousedev *mousedev,
+ struct mousedev_hw_data *packet)
{
struct mousedev_client *client;
struct mousedev_motion *p;
- unsigned long flags;
+ unsigned int new_head;
int wake_readers = 0;
- list_for_each_entry(client, &mousedev->client_list, node) {
- spin_lock_irqsave(&client->packet_lock, flags);
+ rcu_read_lock();
+ list_for_each_entry_rcu(client, &mousedev->client_list, node) {
+
+ /* Just acquire the lock, interrupts already disabled */
+ spin_lock(&client->packet_lock);
p = &client->packets[client->head];
if (client->ready && p->buttons != mousedev->packet.buttons) {
- unsigned int new_head = (client->head + 1) % PACKET_QUEUE_LEN;
+ new_head = (client->head + 1) % PACKET_QUEUE_LEN;
if (new_head != client->tail) {
p = &client->packets[client->head = new_head];
memset(p, 0, sizeof(struct mousedev_motion));
@@ -253,25 +288,29 @@
}
client->pos_x += packet->dx;
- client->pos_x = client->pos_x < 0 ? 0 : (client->pos_x >= xres ? xres : client->pos_x);
+ client->pos_x = client->pos_x < 0 ?
+ 0 : (client->pos_x >= xres ? xres : client->pos_x);
client->pos_y += packet->dy;
- client->pos_y = client->pos_y < 0 ? 0 : (client->pos_y >= yres ? yres : client->pos_y);
+ client->pos_y = client->pos_y < 0 ?
+ 0 : (client->pos_y >= yres ? yres : client->pos_y);
p->dx += packet->dx;
p->dy += packet->dy;
p->dz += packet->dz;
p->buttons = mousedev->packet.buttons;
- if (p->dx || p->dy || p->dz || p->buttons != client->last_buttons)
+ if (p->dx || p->dy || p->dz ||
+ p->buttons != client->last_buttons)
client->ready = 1;
- spin_unlock_irqrestore(&client->packet_lock, flags);
+ spin_unlock(&client->packet_lock);
if (client->ready) {
kill_fasync(&client->fasync, SIGIO, POLL_IN);
wake_readers = 1;
}
}
+ rcu_read_unlock();
if (wake_readers)
wake_up_interruptible(&mousedev->wait);
@@ -281,7 +320,8 @@
{
if (!value) {
if (mousedev->touch &&
- time_before(jiffies, mousedev->touch + msecs_to_jiffies(tap_time))) {
+ time_before(jiffies,
+ mousedev->touch + msecs_to_jiffies(tap_time))) {
/*
* Toggle left button to emulate tap.
* We rely on the fact that mousedev_mix always has 0
@@ -290,7 +330,8 @@
set_bit(0, &mousedev->packet.buttons);
set_bit(0, &mousedev_mix->packet.buttons);
mousedev_notify_readers(mousedev, &mousedev_mix->packet);
- mousedev_notify_readers(mousedev_mix, &mousedev_mix->packet);
+ mousedev_notify_readers(mousedev_mix,
+ &mousedev_mix->packet);
clear_bit(0, &mousedev->packet.buttons);
clear_bit(0, &mousedev_mix->packet.buttons);
}
@@ -302,54 +343,61 @@
mousedev->touch = jiffies;
}
-static void mousedev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value)
+static void mousedev_event(struct input_handle *handle,
+ unsigned int type, unsigned int code, int value)
{
struct mousedev *mousedev = handle->private;
switch (type) {
- case EV_ABS:
- /* Ignore joysticks */
- if (test_bit(BTN_TRIGGER, handle->dev->keybit))
- return;
- if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
- mousedev_touchpad_event(handle->dev, mousedev, code, value);
+ case EV_ABS:
+ /* Ignore joysticks */
+ if (test_bit(BTN_TRIGGER, handle->dev->keybit))
+ return;
+
+ if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
+ mousedev_touchpad_event(handle->dev,
+ mousedev, code, value);
+ else
+ mousedev_abs_event(handle->dev, mousedev, code, value);
+
+ break;
+
+ case EV_REL:
+ mousedev_rel_event(mousedev, code, value);
+ break;
+
+ case EV_KEY:
+ if (value != 2) {
+ if (code == BTN_TOUCH &&
+ test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
+ mousedev_touchpad_touch(mousedev, value);
else
- mousedev_abs_event(handle->dev, mousedev, code, value);
+ mousedev_key_event(mousedev, code, value);
+ }
+ break;
- break;
-
- case EV_REL:
- mousedev_rel_event(mousedev, code, value);
- break;
-
- case EV_KEY:
- if (value != 2) {
- if (code == BTN_TOUCH && test_bit(BTN_TOOL_FINGER, handle->dev->keybit))
- mousedev_touchpad_touch(mousedev, value);
- else
- mousedev_key_event(mousedev, code, value);
+ case EV_SYN:
+ if (code == SYN_REPORT) {
+ if (mousedev->touch) {
+ mousedev->pkt_count++;
+ /*
+ * Input system eats duplicate events,
+ * but we need all of them to do correct
+ * averaging so apply present one forward
+ */
+ fx(0) = fx(1);
+ fy(0) = fy(1);
}
- break;
- case EV_SYN:
- if (code == SYN_REPORT) {
- if (mousedev->touch) {
- mousedev->pkt_count++;
- /* Input system eats duplicate events, but we need all of them
- * to do correct averaging so apply present one forward
- */
- fx(0) = fx(1);
- fy(0) = fy(1);
- }
+ mousedev_notify_readers(mousedev, &mousedev->packet);
+ mousedev_notify_readers(mousedev_mix, &mousedev->packet);
- mousedev_notify_readers(mousedev, &mousedev->packet);
- mousedev_notify_readers(mousedev_mix, &mousedev->packet);
-
- mousedev->packet.dx = mousedev->packet.dy = mousedev->packet.dz = 0;
- mousedev->packet.abs_event = 0;
- }
- break;
+ mousedev->packet.dx = mousedev->packet.dy =
+ mousedev->packet.dz = 0;
+ mousedev->packet.abs_event = 0;
+ }
+ break;
}
}
@@ -367,41 +415,48 @@
{
struct mousedev *mousedev = container_of(dev, struct mousedev, dev);
- mousedev_table[mousedev->minor] = NULL;
kfree(mousedev);
}
-static int mixdev_add_device(struct mousedev *mousedev)
+static int mousedev_open_device(struct mousedev *mousedev)
{
- int error;
+ int retval;
- if (mousedev_mix->open) {
- error = input_open_device(&mousedev->handle);
- if (error)
- return error;
+ retval = mutex_lock_interruptible(&mousedev->mutex);
+ if (retval)
+ return retval;
- mousedev->open++;
- mousedev->mixdev_open = 1;
+ if (mousedev->minor == MOUSEDEV_MIX)
+ mixdev_open_devices();
+ else if (!mousedev->exist)
+ retval = -ENODEV;
+ else if (!mousedev->open++) {
+ retval = input_open_device(&mousedev->handle);
+ if (retval)
+ mousedev->open--;
}
- get_device(&mousedev->dev);
- list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list);
-
- return 0;
+ mutex_unlock(&mousedev->mutex);
+ return retval;
}
-static void mixdev_remove_device(struct mousedev *mousedev)
+static void mousedev_close_device(struct mousedev *mousedev)
{
- if (mousedev->mixdev_open) {
- mousedev->mixdev_open = 0;
- if (!--mousedev->open && mousedev->exist)
- input_close_device(&mousedev->handle);
- }
+ mutex_lock(&mousedev->mutex);
- list_del_init(&mousedev->mixdev_node);
- put_device(&mousedev->dev);
+ if (mousedev->minor == MOUSEDEV_MIX)
+ mixdev_close_devices();
+ else if (mousedev->exist && !--mousedev->open)
+ input_close_device(&mousedev->handle);
+
+ mutex_unlock(&mousedev->mutex);
}
+/*
+ * Open all available devices so they can all be multiplexed in one.
+ * stream. Note that this function is called with mousedev_mix->mutex
+ * held.
+ */
static void mixdev_open_devices(void)
{
struct mousedev *mousedev;
@@ -411,16 +466,19 @@
list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) {
if (!mousedev->mixdev_open) {
- if (!mousedev->open && mousedev->exist)
- if (input_open_device(&mousedev->handle))
- continue;
+ if (mousedev_open_device(mousedev))
+ continue;
- mousedev->open++;
mousedev->mixdev_open = 1;
}
}
}
+/*
+ * Close all devices that were opened as part of multiplexed
+ * device. Note that this function is called with mousedev_mix->mutex
+ * held.
+ */
static void mixdev_close_devices(void)
{
struct mousedev *mousedev;
@@ -431,33 +489,45 @@
list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) {
if (mousedev->mixdev_open) {
mousedev->mixdev_open = 0;
- if (!--mousedev->open && mousedev->exist)
- input_close_device(&mousedev->handle);
+ mousedev_close_device(mousedev);
}
}
}
+
+static void mousedev_attach_client(struct mousedev *mousedev,
+ struct mousedev_client *client)
+{
+ spin_lock(&mousedev->client_lock);
+ list_add_tail_rcu(&client->node, &mousedev->client_list);
+ spin_unlock(&mousedev->client_lock);
+ synchronize_rcu();
+}
+
+static void mousedev_detach_client(struct mousedev *mousedev,
+ struct mousedev_client *client)
+{
+ spin_lock(&mousedev->client_lock);
+ list_del_rcu(&client->node);
+ spin_unlock(&mousedev->client_lock);
+ synchronize_rcu();
+}
+
static int mousedev_release(struct inode *inode, struct file *file)
{
struct mousedev_client *client = file->private_data;
struct mousedev *mousedev = client->mousedev;
mousedev_fasync(-1, file, 0);
-
- list_del(&client->node);
+ mousedev_detach_client(mousedev, client);
kfree(client);
- if (mousedev->minor == MOUSEDEV_MIX)
- mixdev_close_devices();
- else if (!--mousedev->open && mousedev->exist)
- input_close_device(&mousedev->handle);
-
+ mousedev_close_device(mousedev);
put_device(&mousedev->dev);
return 0;
}
-
static int mousedev_open(struct inode *inode, struct file *file)
{
struct mousedev_client *client;
@@ -475,12 +545,17 @@
if (i >= MOUSEDEV_MINORS)
return -ENODEV;
+ error = mutex_lock_interruptible(&mousedev_table_mutex);
+ if (error)
+ return error;
mousedev = mousedev_table[i];
+ if (mousedev)
+ get_device(&mousedev->dev);
+ mutex_unlock(&mousedev_table_mutex);
+
if (!mousedev)
return -ENODEV;
- get_device(&mousedev->dev);
-
client = kzalloc(sizeof(struct mousedev_client), GFP_KERNEL);
if (!client) {
error = -ENOMEM;
@@ -491,21 +566,17 @@
client->pos_x = xres / 2;
client->pos_y = yres / 2;
client->mousedev = mousedev;
- list_add_tail(&client->node, &mousedev->client_list);
+ mousedev_attach_client(mousedev, client);
- if (mousedev->minor == MOUSEDEV_MIX)
- mixdev_open_devices();
- else if (!mousedev->open++ && mousedev->exist) {
- error = input_open_device(&mousedev->handle);
- if (error)
- goto err_free_client;
- }
+ error = mousedev_open_device(mousedev);
+ if (error)
+ goto err_free_client;
file->private_data = client;
return 0;
err_free_client:
- list_del(&client->node);
+ mousedev_detach_client(mousedev, client);
kfree(client);
err_put_mousedev:
put_device(&mousedev->dev);
@@ -517,41 +588,41 @@
return delta > limit ? limit : (delta < -limit ? -limit : delta);
}
-static void mousedev_packet(struct mousedev_client *client, signed char *ps2_data)
+static void mousedev_packet(struct mousedev_client *client,
+ signed char *ps2_data)
{
- struct mousedev_motion *p;
- unsigned long flags;
+ struct mousedev_motion *p = &client->packets[client->tail];
- spin_lock_irqsave(&client->packet_lock, flags);
- p = &client->packets[client->tail];
-
- ps2_data[0] = 0x08 | ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07);
+ ps2_data[0] = 0x08 |
+ ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07);
ps2_data[1] = mousedev_limit_delta(p->dx, 127);
ps2_data[2] = mousedev_limit_delta(p->dy, 127);
p->dx -= ps2_data[1];
p->dy -= ps2_data[2];
switch (client->mode) {
- case MOUSEDEV_EMUL_EXPS:
- ps2_data[3] = mousedev_limit_delta(p->dz, 7);
- p->dz -= ps2_data[3];
- ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1);
- client->bufsiz = 4;
- break;
+ case MOUSEDEV_EMUL_EXPS:
+ ps2_data[3] = mousedev_limit_delta(p->dz, 7);
+ p->dz -= ps2_data[3];
+ ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1);
+ client->bufsiz = 4;
+ break;
- case MOUSEDEV_EMUL_IMPS:
- ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
- ps2_data[3] = mousedev_limit_delta(p->dz, 127);
- p->dz -= ps2_data[3];
- client->bufsiz = 4;
- break;
+ case MOUSEDEV_EMUL_IMPS:
+ ps2_data[0] |=
+ ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
+ ps2_data[3] = mousedev_limit_delta(p->dz, 127);
+ p->dz -= ps2_data[3];
+ client->bufsiz = 4;
+ break;
- case MOUSEDEV_EMUL_PS2:
- default:
- ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
- p->dz = 0;
- client->bufsiz = 3;
- break;
+ case MOUSEDEV_EMUL_PS2:
+ default:
+ ps2_data[0] |=
+ ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1);
+ p->dz = 0;
+ client->bufsiz = 3;
+ break;
}
if (!p->dx && !p->dy && !p->dz) {
@@ -561,12 +632,56 @@
} else
client->tail = (client->tail + 1) % PACKET_QUEUE_LEN;
}
-
- spin_unlock_irqrestore(&client->packet_lock, flags);
}
+static void mousedev_generate_response(struct mousedev_client *client,
+ int command)
+{
+ client->ps2[0] = 0xfa; /* ACK */
-static ssize_t mousedev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+ switch (command) {
+
+ case 0xeb: /* Poll */
+ mousedev_packet(client, &client->ps2[1]);
+ client->bufsiz++; /* account for leading ACK */
+ break;
+
+ case 0xf2: /* Get ID */
+ switch (client->mode) {
+ case MOUSEDEV_EMUL_PS2:
+ client->ps2[1] = 0;
+ break;
+ case MOUSEDEV_EMUL_IMPS:
+ client->ps2[1] = 3;
+ break;
+ case MOUSEDEV_EMUL_EXPS:
+ client->ps2[1] = 4;
+ break;
+ }
+ client->bufsiz = 2;
+ break;
+
+ case 0xe9: /* Get info */
+ client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200;
+ client->bufsiz = 4;
+ break;
+
+ case 0xff: /* Reset */
+ client->impsseq = client->imexseq = 0;
+ client->mode = MOUSEDEV_EMUL_PS2;
+ client->ps2[1] = 0xaa; client->ps2[2] = 0x00;
+ client->bufsiz = 3;
+ break;
+
+ default:
+ client->bufsiz = 1;
+ break;
+ }
+ client->buffer = client->bufsiz;
+}
+
+static ssize_t mousedev_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
struct mousedev_client *client = file->private_data;
unsigned char c;
@@ -577,6 +692,8 @@
if (get_user(c, buffer + i))
return -EFAULT;
+ spin_lock_irq(&client->packet_lock);
+
if (c == mousedev_imex_seq[client->imexseq]) {
if (++client->imexseq == MOUSEDEV_SEQ_LEN) {
client->imexseq = 0;
@@ -593,68 +710,39 @@
} else
client->impsseq = 0;
- client->ps2[0] = 0xfa;
+ mousedev_generate_response(client, c);
- switch (c) {
-
- case 0xeb: /* Poll */
- mousedev_packet(client, &client->ps2[1]);
- client->bufsiz++; /* account for leading ACK */
- break;
-
- case 0xf2: /* Get ID */
- switch (client->mode) {
- case MOUSEDEV_EMUL_PS2: client->ps2[1] = 0; break;
- case MOUSEDEV_EMUL_IMPS: client->ps2[1] = 3; break;
- case MOUSEDEV_EMUL_EXPS: client->ps2[1] = 4; break;
- }
- client->bufsiz = 2;
- break;
-
- case 0xe9: /* Get info */
- client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200;
- client->bufsiz = 4;
- break;
-
- case 0xff: /* Reset */
- client->impsseq = client->imexseq = 0;
- client->mode = MOUSEDEV_EMUL_PS2;
- client->ps2[1] = 0xaa; client->ps2[2] = 0x00;
- client->bufsiz = 3;
- break;
-
- default:
- client->bufsiz = 1;
- break;
- }
-
- client->buffer = client->bufsiz;
+ spin_unlock_irq(&client->packet_lock);
}
kill_fasync(&client->fasync, SIGIO, POLL_IN);
-
wake_up_interruptible(&client->mousedev->wait);
return count;
}
-static ssize_t mousedev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+static ssize_t mousedev_read(struct file *file, char __user *buffer,
+ size_t count, loff_t *ppos)
{
struct mousedev_client *client = file->private_data;
+ struct mousedev *mousedev = client->mousedev;
+ signed char data[sizeof(client->ps2)];
int retval = 0;
- if (!client->ready && !client->buffer && (file->f_flags & O_NONBLOCK))
+ if (!client->ready && !client->buffer && mousedev->exist &&
+ (file->f_flags & O_NONBLOCK))
return -EAGAIN;
- retval = wait_event_interruptible(client->mousedev->wait,
- !client->mousedev->exist || client->ready || client->buffer);
-
+ retval = wait_event_interruptible(mousedev->wait,
+ !mousedev->exist || client->ready || client->buffer);
if (retval)
return retval;
- if (!client->mousedev->exist)
+ if (!mousedev->exist)
return -ENODEV;
+ spin_lock_irq(&client->packet_lock);
+
if (!client->buffer && client->ready) {
mousedev_packet(client, client->ps2);
client->buffer = client->bufsiz;
@@ -663,9 +751,12 @@
if (count > client->buffer)
count = client->buffer;
+ memcpy(data, client->ps2 + client->bufsiz - client->buffer, count);
client->buffer -= count;
- if (copy_to_user(buffer, client->ps2 + client->bufsiz - client->buffer - count, count))
+ spin_unlock_irq(&client->packet_lock);
+
+ if (copy_to_user(buffer, data, count))
return -EFAULT;
return count;
@@ -692,6 +783,60 @@
.fasync = mousedev_fasync,
};
+static int mousedev_install_chrdev(struct mousedev *mousedev)
+{
+ mousedev_table[mousedev->minor] = mousedev;
+ return 0;
+}
+
+static void mousedev_remove_chrdev(struct mousedev *mousedev)
+{
+ mutex_lock(&mousedev_table_mutex);
+ mousedev_table[mousedev->minor] = NULL;
+ mutex_unlock(&mousedev_table_mutex);
+}
+
+/*
+ * Mark device non-existent. This disables writes, ioctls and
+ * prevents new users from opening the device. Already posted
+ * blocking reads will stay, however new ones will fail.
+ */
+static void mousedev_mark_dead(struct mousedev *mousedev)
+{
+ mutex_lock(&mousedev->mutex);
+ mousedev->exist = 0;
+ mutex_unlock(&mousedev->mutex);
+}
+
+/*
+ * Wake up users waiting for IO so they can disconnect from
+ * dead device.
+ */
+static void mousedev_hangup(struct mousedev *mousedev)
+{
+ struct mousedev_client *client;
+
+ spin_lock(&mousedev->client_lock);
+ list_for_each_entry(client, &mousedev->client_list, node)
+ kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+ spin_unlock(&mousedev->client_lock);
+
+ wake_up_interruptible(&mousedev->wait);
+}
+
+static void mousedev_cleanup(struct mousedev *mousedev)
+{
+ struct input_handle *handle = &mousedev->handle;
+
+ mousedev_mark_dead(mousedev);
+ mousedev_hangup(mousedev);
+ mousedev_remove_chrdev(mousedev);
+
+ /* mousedev is marked dead so no one else accesses mousedev->open */
+ if (mousedev->open)
+ input_close_device(handle);
+}
+
static struct mousedev *mousedev_create(struct input_dev *dev,
struct input_handler *handler,
int minor)
@@ -707,6 +852,10 @@
INIT_LIST_HEAD(&mousedev->client_list);
INIT_LIST_HEAD(&mousedev->mixdev_node);
+ spin_lock_init(&mousedev->client_lock);
+ mutex_init(&mousedev->mutex);
+ lockdep_set_subclass(&mousedev->mutex,
+ minor == MOUSEDEV_MIX ? MOUSEDEV_MIX : 0);
init_waitqueue_head(&mousedev->wait);
if (minor == MOUSEDEV_MIX)
@@ -731,14 +880,27 @@
mousedev->dev.release = mousedev_free;
device_initialize(&mousedev->dev);
- mousedev_table[minor] = mousedev;
+ if (minor != MOUSEDEV_MIX) {
+ error = input_register_handle(&mousedev->handle);
+ if (error)
+ goto err_free_mousedev;
+ }
+
+ error = mousedev_install_chrdev(mousedev);
+ if (error)
+ goto err_unregister_handle;
error = device_add(&mousedev->dev);
if (error)
- goto err_free_mousedev;
+ goto err_cleanup_mousedev;
return mousedev;
+ err_cleanup_mousedev:
+ mousedev_cleanup(mousedev);
+ err_unregister_handle:
+ if (minor != MOUSEDEV_MIX)
+ input_unregister_handle(&mousedev->handle);
err_free_mousedev:
put_device(&mousedev->dev);
err_out:
@@ -747,29 +909,64 @@
static void mousedev_destroy(struct mousedev *mousedev)
{
- struct mousedev_client *client;
-
device_del(&mousedev->dev);
- mousedev->exist = 0;
+ mousedev_cleanup(mousedev);
+ if (mousedev->minor != MOUSEDEV_MIX)
+ input_unregister_handle(&mousedev->handle);
+ put_device(&mousedev->dev);
+}
- if (mousedev->open) {
- input_close_device(&mousedev->handle);
- list_for_each_entry(client, &mousedev->client_list, node)
- kill_fasync(&client->fasync, SIGIO, POLL_HUP);
- wake_up_interruptible(&mousedev->wait);
+static int mixdev_add_device(struct mousedev *mousedev)
+{
+ int retval;
+
+ retval = mutex_lock_interruptible(&mousedev_mix->mutex);
+ if (retval)
+ return retval;
+
+ if (mousedev_mix->open) {
+ retval = mousedev_open_device(mousedev);
+ if (retval)
+ goto out;
+
+ mousedev->mixdev_open = 1;
}
+ get_device(&mousedev->dev);
+ list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list);
+
+ out:
+ mutex_unlock(&mousedev_mix->mutex);
+ return retval;
+}
+
+static void mixdev_remove_device(struct mousedev *mousedev)
+{
+ mutex_lock(&mousedev_mix->mutex);
+
+ if (mousedev->mixdev_open) {
+ mousedev->mixdev_open = 0;
+ mousedev_close_device(mousedev);
+ }
+
+ list_del_init(&mousedev->mixdev_node);
+ mutex_unlock(&mousedev_mix->mutex);
+
put_device(&mousedev->dev);
}
-static int mousedev_connect(struct input_handler *handler, struct input_dev *dev,
+static int mousedev_connect(struct input_handler *handler,
+ struct input_dev *dev,
const struct input_device_id *id)
{
struct mousedev *mousedev;
int minor;
int error;
- for (minor = 0; minor < MOUSEDEV_MINORS && mousedev_table[minor]; minor++);
+ for (minor = 0; minor < MOUSEDEV_MINORS; minor++)
+ if (!mousedev_table[minor])
+ break;
+
if (minor == MOUSEDEV_MINORS) {
printk(KERN_ERR "mousedev: no more free mousedev devices\n");
return -ENFILE;
@@ -779,21 +976,13 @@
if (IS_ERR(mousedev))
return PTR_ERR(mousedev);
- error = input_register_handle(&mousedev->handle);
- if (error)
- goto err_delete_mousedev;
-
error = mixdev_add_device(mousedev);
- if (error)
- goto err_unregister_handle;
+ if (error) {
+ mousedev_destroy(mousedev);
+ return error;
+ }
return 0;
-
- err_unregister_handle:
- input_unregister_handle(&mousedev->handle);
- err_delete_mousedev:
- device_unregister(&mousedev->dev);
- return error;
}
static void mousedev_disconnect(struct input_handle *handle)
@@ -801,33 +990,42 @@
struct mousedev *mousedev = handle->private;
mixdev_remove_device(mousedev);
- input_unregister_handle(handle);
mousedev_destroy(mousedev);
}
static const struct input_device_id mousedev_ids[] = {
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_KEYBIT |
+ INPUT_DEVICE_ID_MATCH_RELBIT,
.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
.keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
.relbit = { BIT(REL_X) | BIT(REL_Y) },
- }, /* A mouse like device, at least one button, two relative axes */
+ }, /* A mouse like device, at least one button,
+ two relative axes */
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_RELBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_RELBIT,
.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
.relbit = { BIT(REL_WHEEL) },
}, /* A separate scrollwheel */
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_KEYBIT |
+ INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
.absbit = { BIT(ABS_X) | BIT(ABS_Y) },
- }, /* A tablet like device, at least touch detection, two absolute axes */
+ }, /* A tablet like device, at least touch detection,
+ two absolute axes */
{
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+ INPUT_DEVICE_ID_MATCH_KEYBIT |
+ INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
.keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) },
- .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | BIT(ABS_TOOL_WIDTH) },
+ .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) |
+ BIT(ABS_TOOL_WIDTH) },
}, /* A touchpad */
{ }, /* Terminating entry */
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index c2eea27..11dafc0 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -385,6 +385,8 @@
i8042_ctr |= I8042_CTR_KBDINT;
if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
+ i8042_ctr &= ~I8042_CTR_KBDINT;
+ i8042_ctr |= I8042_CTR_KBDDIS;
printk(KERN_ERR "i8042.c: Failed to enable KBD port.\n");
return -EIO;
}
@@ -402,6 +404,8 @@
i8042_ctr |= I8042_CTR_AUXINT;
if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
+ i8042_ctr &= ~I8042_CTR_AUXINT;
+ i8042_ctr |= I8042_CTR_AUXDIS;
printk(KERN_ERR "i8042.c: Failed to enable AUX port.\n");
return -EIO;
}
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index f929fcd..e3e0baa 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -126,6 +126,16 @@
To compile this driver as a module, choose M here: the
module will be called hp680_ts_input.
+config TOUCHSCREEN_HP7XX
+ tristate "HP Jornada 710/720/728 touchscreen"
+ depends on SA1100_JORNADA720_SSP
+ help
+ Say Y here if you have a HP Jornada 710/720/728 and want
+ to support the built-in touchscreen.
+
+ To compile this driver as a module, choose M here: the
+ module will be called jornada720_ts.
+
config TOUCHSCREEN_PENMOUNT
tristate "Penmount serial touchscreen"
select SERIO
@@ -191,6 +201,7 @@
- Gunze AHL61
- DMC TSC-10/25
- IRTOUCHSYSTEMS/UNITOP
+ - IdealTEK URTC1000
Have a look at <http://linux.chapter7.ch/touchkit/> for
a usage description and the required user-space stuff.
@@ -238,4 +249,14 @@
bool "IRTOUCHSYSTEMS/UNITOP device support" if EMBEDDED
depends on TOUCHSCREEN_USB_COMPOSITE
+config TOUCHSCREEN_USB_IDEALTEK
+ default y
+ bool "IdealTEK URTC1000 device support" if EMBEDDED
+ depends on TOUCHSCREEN_USB_COMPOSITE
+
+config TOUCHSCREEN_USB_GENERAL_TOUCH
+ default y
+ bool "GeneralTouch Touchscreen device support" if EMBEDDED
+ depends on TOUCHSCREEN_USB_COMPOSITE
+
endif
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 5de8933..35d4097 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -13,6 +13,7 @@
obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o
obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o
obj-$(CONFIG_TOUCHSCREEN_HP600) += hp680_ts_input.o
+obj-$(CONFIG_TOUCHSCREEN_HP7XX) += jornada720_ts.o
obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE) += usbtouchscreen.o
obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o
obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT) += touchright.o
diff --git a/drivers/input/touchscreen/jornada720_ts.c b/drivers/input/touchscreen/jornada720_ts.c
new file mode 100644
index 0000000..42a1c9a
--- /dev/null
+++ b/drivers/input/touchscreen/jornada720_ts.c
@@ -0,0 +1,182 @@
+/*
+ * drivers/input/touchscreen/jornada720_ts.c
+ *
+ * Copyright (C) 2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
+ *
+ * Copyright (C) 2006 Filip Zyzniewski <filip.zyzniewski@tefnet.pl>
+ * based on HP Jornada 56x touchscreen driver by Alex Lange <chicken@handhelds.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * HP Jornada 710/720/729 Touchscreen Driver
+ */
+
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/hardware.h>
+#include <asm/arch/jornada720.h>
+
+MODULE_AUTHOR("Kristoffer Ericson <kristoffer.ericson@gmail.com>");
+MODULE_DESCRIPTION("HP Jornada 710/720/728 touchscreen driver");
+MODULE_LICENSE("GPLv2");
+
+struct jornada_ts {
+ struct input_dev *dev;
+ int x_data[4]; /* X sample values */
+ int y_data[4]; /* Y sample values */
+};
+
+static void jornada720_ts_collect_data(struct jornada_ts *jornada_ts)
+{
+
+ /* 3 low word X samples */
+ jornada_ts->x_data[0] = jornada_ssp_byte(TXDUMMY);
+ jornada_ts->x_data[1] = jornada_ssp_byte(TXDUMMY);
+ jornada_ts->x_data[2] = jornada_ssp_byte(TXDUMMY);
+
+ /* 3 low word Y samples */
+ jornada_ts->y_data[0] = jornada_ssp_byte(TXDUMMY);
+ jornada_ts->y_data[1] = jornada_ssp_byte(TXDUMMY);
+ jornada_ts->y_data[2] = jornada_ssp_byte(TXDUMMY);
+
+ /* combined x samples bits */
+ jornada_ts->x_data[3] = jornada_ssp_byte(TXDUMMY);
+
+ /* combined y samples bits */
+ jornada_ts->y_data[3] = jornada_ssp_byte(TXDUMMY);
+}
+
+static int jornada720_ts_average(int coords[4])
+{
+ int coord, high_bits = coords[3];
+
+ coord = coords[0] | ((high_bits & 0x03) << 8);
+ coord += coords[1] | ((high_bits & 0x0c) << 6);
+ coord += coords[2] | ((high_bits & 0x30) << 4);
+
+ return coord / 3;
+}
+
+static irqreturn_t jornada720_ts_interrupt(int irq, void *dev_id)
+{
+ struct platform_device *pdev = dev_id;
+ struct jornada_ts *jornada_ts = platform_get_drvdata(pdev);
+ struct input_dev *input = jornada_ts->dev;
+ int x, y;
+
+ /* If GPIO_GPIO9 is set to high then report pen up */
+ if (GPLR & GPIO_GPIO(9)) {
+ input_report_key(input, BTN_TOUCH, 0);
+ input_sync(input);
+ } else {
+ jornada_ssp_start();
+
+ /* proper reply to request is always TXDUMMY */
+ if (jornada_ssp_inout(GETTOUCHSAMPLES) == TXDUMMY) {
+ jornada720_ts_collect_data(jornada_ts);
+
+ x = jornada720_ts_average(jornada_ts->x_data);
+ y = jornada720_ts_average(jornada_ts->y_data);
+
+ input_report_key(input, BTN_TOUCH, 1);
+ input_report_abs(input, ABS_X, x);
+ input_report_abs(input, ABS_Y, y);
+ input_sync(input);
+ }
+
+ jornada_ssp_end();
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __devinit jornada720_ts_probe(struct platform_device *pdev)
+{
+ struct jornada_ts *jornada_ts;
+ struct input_dev *input_dev;
+ int error;
+
+ jornada_ts = kzalloc(sizeof(struct jornada_ts), GFP_KERNEL);
+ input_dev = input_allocate_device();
+
+ if (!jornada_ts || !input_dev) {
+ error = -ENOMEM;
+ goto fail1;
+ }
+
+ platform_set_drvdata(pdev, jornada_ts);
+
+ jornada_ts->dev = input_dev;
+
+ input_dev->name = "HP Jornada 7xx Touchscreen";
+ input_dev->phys = "jornadats/input0";
+ input_dev->id.bustype = BUS_HOST;
+ input_dev->dev.parent = &pdev->dev;
+
+ input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+ input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+ input_set_abs_params(input_dev, ABS_X, 270, 3900, 0, 0);
+ input_set_abs_params(input_dev, ABS_Y, 180, 3700, 0, 0);
+
+ error = request_irq(IRQ_GPIO9,
+ jornada720_ts_interrupt,
+ IRQF_DISABLED | IRQF_TRIGGER_RISING,
+ "HP7XX Touchscreen driver", pdev);
+ if (error) {
+ printk(KERN_INFO "HP7XX TS : Unable to acquire irq!\n");
+ goto fail1;
+ }
+
+ error = input_register_device(jornada_ts->dev);
+ if (error)
+ goto fail2;
+
+ return 0;
+
+ fail2:
+ free_irq(IRQ_GPIO9, pdev);
+ fail1:
+ platform_set_drvdata(pdev, NULL);
+ input_free_device(input_dev);
+ kfree(jornada_ts);
+ return error;
+}
+
+static int __devexit jornada720_ts_remove(struct platform_device *pdev)
+{
+ struct jornada_ts *jornada_ts = platform_get_drvdata(pdev);
+
+ free_irq(IRQ_GPIO9, pdev);
+ platform_set_drvdata(pdev, NULL);
+ input_unregister_device(jornada_ts->dev);
+ kfree(jornada_ts);
+
+ return 0;
+}
+
+static struct platform_driver jornada720_ts_driver = {
+ .probe = jornada720_ts_probe,
+ .remove = __devexit_p(jornada720_ts_remove),
+ .driver = {
+ .name = "jornada_ts",
+ },
+};
+
+static int __init jornada720_ts_init(void)
+{
+ return platform_driver_register(&jornada720_ts_driver);
+}
+
+static void __exit jornada720_ts_exit(void)
+{
+ platform_driver_unregister(&jornada720_ts_driver);
+}
+
+module_init(jornada720_ts_init);
+module_exit(jornada720_ts_exit);
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 36f9440..86aed64 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -130,8 +130,7 @@
if (val & UCB_ADC_DAT_VALID)
break;
/* yield to other processes */
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
}
return UCB_ADC_DAT_VALUE(val);
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 741f6c6..9fb3d5c3 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -10,6 +10,7 @@
* - Gunze AHL61
* - DMC TSC-10/25
* - IRTOUCHSYSTEMS/UNITOP
+ * - IdealTEK URTC1000
*
* Copyright (C) 2004-2006 by Daniel Ritz <daniel.ritz@gmx.ch>
* Copyright (C) by Todd E. Johnson (mtouchusb.c)
@@ -92,7 +93,7 @@
};
-#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO)
+#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO) || defined(CONFIG_TOUCHSCREEN_USB_IDEALTEK)
#define MULTI_PACKET
#endif
@@ -112,6 +113,8 @@
DEVTYPE_GUNZE,
DEVTYPE_DMC_TSC10,
DEVTYPE_IRTOUCH,
+ DEVTYPE_IDEALTEK,
+ DEVTYPE_GENERAL_TOUCH,
};
static struct usb_device_id usbtouch_devices[] = {
@@ -157,6 +160,14 @@
{USB_DEVICE(0x6615, 0x0001), .driver_info = DEVTYPE_IRTOUCH},
#endif
+#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
+ {USB_DEVICE(0x1391, 0x1000), .driver_info = DEVTYPE_IDEALTEK},
+#endif
+
+#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
+ {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH},
+#endif
+
{}
};
@@ -396,7 +407,8 @@
TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT);
if (ret < 0)
return ret;
- if (buf[0] != 0x06 || buf[1] != 0x00)
+ if ((buf[0] != 0x06 || buf[1] != 0x00) &&
+ (buf[0] != 0x15 || buf[1] != 0x01))
return -ENODEV;
/* start sending data */
@@ -438,6 +450,57 @@
/*****************************************************************************
+ * IdealTEK URTC1000 Part
+ */
+#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
+static int idealtek_get_pkt_len(unsigned char *buf, int len)
+{
+ if (buf[0] & 0x80)
+ return 5;
+ if (buf[0] == 0x01)
+ return len;
+ return 0;
+}
+
+static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
+{
+ switch (pkt[0] & 0x98) {
+ case 0x88:
+ /* touch data in IdealTEK mode */
+ dev->x = (pkt[1] << 5) | (pkt[2] >> 2);
+ dev->y = (pkt[3] << 5) | (pkt[4] >> 2);
+ dev->touch = (pkt[0] & 0x40) ? 1 : 0;
+ return 1;
+
+ case 0x98:
+ /* touch data in MT emulation mode */
+ dev->x = (pkt[2] << 5) | (pkt[1] >> 2);
+ dev->y = (pkt[4] << 5) | (pkt[3] >> 2);
+ dev->touch = (pkt[0] & 0x40) ? 1 : 0;
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+#endif
+
+/*****************************************************************************
+ * General Touch Part
+ */
+#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
+static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
+{
+ dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1] ;
+ dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3] ;
+ dev->press = pkt[5] & 0xff;
+ dev->touch = pkt[0] & 0x01;
+
+ return 1;
+}
+#endif
+
+/*****************************************************************************
* the different device descriptors
*/
static struct usbtouch_device_info usbtouch_dev_info[] = {
@@ -537,6 +600,32 @@
.read_data = irtouch_read_data,
},
#endif
+
+#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
+ [DEVTYPE_IDEALTEK] = {
+ .min_xc = 0x0,
+ .max_xc = 0x0fff,
+ .min_yc = 0x0,
+ .max_yc = 0x0fff,
+ .rept_size = 8,
+ .flags = USBTOUCH_FLG_BUFFER,
+ .process_pkt = usbtouch_process_multi,
+ .get_pkt_len = idealtek_get_pkt_len,
+ .read_data = idealtek_read_data,
+ },
+#endif
+
+#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
+ [DEVTYPE_GENERAL_TOUCH] = {
+ .min_xc = 0x0,
+ .max_xc = 0x0500,
+ .min_yc = 0x0,
+ .max_yc = 0x0500,
+ .rept_size = 7,
+ .read_data = general_touch_read_data,
+ }
+#endif
+
};
diff --git a/drivers/input/tsdev.c b/drivers/input/tsdev.c
deleted file mode 100644
index d2f882e..0000000
--- a/drivers/input/tsdev.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * $Id: tsdev.c,v 1.15 2002/04/10 16:50:19 jsimmons Exp $
- *
- * Copyright (c) 2001 "Crazy" james Simmons
- *
- * Compaq touchscreen protocol driver. The protocol emulated by this driver
- * is obsolete; for new programs use the tslib library which can read directly
- * from evdev and perform dejittering, variance filtering and calibration -
- * all in user space, not at kernel level. The meaning of this driver is
- * to allow usage of newer input drivers with old applications that use the
- * old /dev/h3600_ts and /dev/h3600_tsraw devices.
- *
- * 09-Apr-2004: Andrew Zabolotny <zap@homelink.ru>
- * Fixed to actually work, not just output random numbers.
- * Added support for both h3600_ts and h3600_tsraw protocol
- * emulation.
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <jsimmons@infradead.org>.
- */
-
-#define TSDEV_MINOR_BASE 128
-#define TSDEV_MINORS 32
-/* First 16 devices are h3600_ts compatible; second 16 are h3600_tsraw */
-#define TSDEV_MINOR_MASK 15
-#define TSDEV_BUFFER_SIZE 64
-
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/input.h>
-#include <linux/major.h>
-#include <linux/random.h>
-#include <linux/time.h>
-#include <linux/device.h>
-
-#ifndef CONFIG_INPUT_TSDEV_SCREEN_X
-#define CONFIG_INPUT_TSDEV_SCREEN_X 240
-#endif
-#ifndef CONFIG_INPUT_TSDEV_SCREEN_Y
-#define CONFIG_INPUT_TSDEV_SCREEN_Y 320
-#endif
-
-/* This driver emulates both protocols of the old h3600_ts and h3600_tsraw
- * devices. The first one must output X/Y data in 'cooked' format, e.g.
- * filtered, dejittered and calibrated. Second device just outputs raw
- * data received from the hardware.
- *
- * This driver doesn't support filtering and dejittering; it supports only
- * calibration. Filtering and dejittering must be done in the low-level
- * driver, if needed, because it may gain additional benefits from knowing
- * the low-level details, the nature of noise and so on.
- *
- * The driver precomputes a calibration matrix given the initial xres and
- * yres values (quite innacurate for most touchscreens) that will result
- * in a more or less expected range of output values. The driver supports
- * the TS_SET_CAL ioctl, which will replace the calibration matrix with a
- * new one, supposedly generated from the values taken from the raw device.
- */
-
-MODULE_AUTHOR("James Simmons <jsimmons@transvirtual.com>");
-MODULE_DESCRIPTION("Input driver to touchscreen converter");
-MODULE_LICENSE("GPL");
-
-static int xres = CONFIG_INPUT_TSDEV_SCREEN_X;
-module_param(xres, uint, 0);
-MODULE_PARM_DESC(xres, "Horizontal screen resolution (can be negative for X-mirror)");
-
-static int yres = CONFIG_INPUT_TSDEV_SCREEN_Y;
-module_param(yres, uint, 0);
-MODULE_PARM_DESC(yres, "Vertical screen resolution (can be negative for Y-mirror)");
-
-/* From Compaq's Touch Screen Specification version 0.2 (draft) */
-struct ts_event {
- short pressure;
- short x;
- short y;
- short millisecs;
-};
-
-struct ts_calibration {
- int xscale;
- int xtrans;
- int yscale;
- int ytrans;
- int xyswap;
-};
-
-struct tsdev {
- int exist;
- int open;
- int minor;
- char name[8];
- struct input_handle handle;
- wait_queue_head_t wait;
- struct list_head client_list;
- struct device dev;
-
- int x, y, pressure;
- struct ts_calibration cal;
-};
-
-struct tsdev_client {
- struct fasync_struct *fasync;
- struct list_head node;
- struct tsdev *tsdev;
- int head, tail;
- struct ts_event event[TSDEV_BUFFER_SIZE];
- int raw;
-};
-
-/* The following ioctl codes are defined ONLY for backward compatibility.
- * Don't use tsdev for new developement; use the tslib library instead.
- * Touchscreen calibration is a fully userspace task.
- */
-/* Use 'f' as magic number */
-#define IOC_H3600_TS_MAGIC 'f'
-#define TS_GET_CAL _IOR(IOC_H3600_TS_MAGIC, 10, struct ts_calibration)
-#define TS_SET_CAL _IOW(IOC_H3600_TS_MAGIC, 11, struct ts_calibration)
-
-static struct tsdev *tsdev_table[TSDEV_MINORS/2];
-
-static int tsdev_fasync(int fd, struct file *file, int on)
-{
- struct tsdev_client *client = file->private_data;
- int retval;
-
- retval = fasync_helper(fd, file, on, &client->fasync);
- return retval < 0 ? retval : 0;
-}
-
-static int tsdev_open(struct inode *inode, struct file *file)
-{
- int i = iminor(inode) - TSDEV_MINOR_BASE;
- struct tsdev_client *client;
- struct tsdev *tsdev;
- int error;
-
- printk(KERN_WARNING "tsdev (compaq touchscreen emulation) is scheduled "
- "for removal.\nSee Documentation/feature-removal-schedule.txt "
- "for details.\n");
-
- if (i >= TSDEV_MINORS)
- return -ENODEV;
-
- tsdev = tsdev_table[i & TSDEV_MINOR_MASK];
- if (!tsdev || !tsdev->exist)
- return -ENODEV;
-
- get_device(&tsdev->dev);
-
- client = kzalloc(sizeof(struct tsdev_client), GFP_KERNEL);
- if (!client) {
- error = -ENOMEM;
- goto err_put_tsdev;
- }
-
- client->tsdev = tsdev;
- client->raw = (i >= TSDEV_MINORS / 2) ? 1 : 0;
- list_add_tail(&client->node, &tsdev->client_list);
-
- if (!tsdev->open++ && tsdev->exist) {
- error = input_open_device(&tsdev->handle);
- if (error)
- goto err_free_client;
- }
-
- file->private_data = client;
- return 0;
-
- err_free_client:
- list_del(&client->node);
- kfree(client);
- err_put_tsdev:
- put_device(&tsdev->dev);
- return error;
-}
-
-static void tsdev_free(struct device *dev)
-{
- struct tsdev *tsdev = container_of(dev, struct tsdev, dev);
-
- tsdev_table[tsdev->minor] = NULL;
- kfree(tsdev);
-}
-
-static int tsdev_release(struct inode *inode, struct file *file)
-{
- struct tsdev_client *client = file->private_data;
- struct tsdev *tsdev = client->tsdev;
-
- tsdev_fasync(-1, file, 0);
-
- list_del(&client->node);
- kfree(client);
-
- if (!--tsdev->open && tsdev->exist)
- input_close_device(&tsdev->handle);
-
- put_device(&tsdev->dev);
-
- return 0;
-}
-
-static ssize_t tsdev_read(struct file *file, char __user *buffer, size_t count,
- loff_t *ppos)
-{
- struct tsdev_client *client = file->private_data;
- struct tsdev *tsdev = client->tsdev;
- int retval = 0;
-
- if (client->head == client->tail && tsdev->exist && (file->f_flags & O_NONBLOCK))
- return -EAGAIN;
-
- retval = wait_event_interruptible(tsdev->wait,
- client->head != client->tail || !tsdev->exist);
- if (retval)
- return retval;
-
- if (!tsdev->exist)
- return -ENODEV;
-
- while (client->head != client->tail &&
- retval + sizeof (struct ts_event) <= count) {
- if (copy_to_user (buffer + retval, client->event + client->tail,
- sizeof (struct ts_event)))
- return -EFAULT;
- client->tail = (client->tail + 1) & (TSDEV_BUFFER_SIZE - 1);
- retval += sizeof (struct ts_event);
- }
-
- return retval;
-}
-
-/* No kernel lock - fine */
-static unsigned int tsdev_poll(struct file *file, poll_table *wait)
-{
- struct tsdev_client *client = file->private_data;
- struct tsdev *tsdev = client->tsdev;
-
- poll_wait(file, &tsdev->wait, wait);
- return ((client->head == client->tail) ? 0 : (POLLIN | POLLRDNORM)) |
- (tsdev->exist ? 0 : (POLLHUP | POLLERR));
-}
-
-static int tsdev_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct tsdev_client *client = file->private_data;
- struct tsdev *tsdev = client->tsdev;
- int retval = 0;
-
- switch (cmd) {
- case TS_GET_CAL:
- if (copy_to_user((void __user *)arg, &tsdev->cal,
- sizeof (struct ts_calibration)))
- retval = -EFAULT;
- break;
-
- case TS_SET_CAL:
- if (copy_from_user(&tsdev->cal, (void __user *)arg,
- sizeof (struct ts_calibration)))
- retval = -EFAULT;
- break;
-
- default:
- retval = -EINVAL;
- break;
- }
-
- return retval;
-}
-
-static const struct file_operations tsdev_fops = {
- .owner = THIS_MODULE,
- .open = tsdev_open,
- .release = tsdev_release,
- .read = tsdev_read,
- .poll = tsdev_poll,
- .fasync = tsdev_fasync,
- .ioctl = tsdev_ioctl,
-};
-
-static void tsdev_event(struct input_handle *handle, unsigned int type,
- unsigned int code, int value)
-{
- struct tsdev *tsdev = handle->private;
- struct tsdev_client *client;
- struct timeval time;
-
- switch (type) {
- case EV_ABS:
- switch (code) {
- case ABS_X:
- tsdev->x = value;
- break;
-
- case ABS_Y:
- tsdev->y = value;
- break;
-
- case ABS_PRESSURE:
- if (value > handle->dev->absmax[ABS_PRESSURE])
- value = handle->dev->absmax[ABS_PRESSURE];
- value -= handle->dev->absmin[ABS_PRESSURE];
- if (value < 0)
- value = 0;
- tsdev->pressure = value;
- break;
- }
- break;
-
- case EV_REL:
- switch (code) {
- case REL_X:
- tsdev->x += value;
- if (tsdev->x < 0)
- tsdev->x = 0;
- else if (tsdev->x > xres)
- tsdev->x = xres;
- break;
-
- case REL_Y:
- tsdev->y += value;
- if (tsdev->y < 0)
- tsdev->y = 0;
- else if (tsdev->y > yres)
- tsdev->y = yres;
- break;
- }
- break;
-
- case EV_KEY:
- if (code == BTN_TOUCH || code == BTN_MOUSE) {
- switch (value) {
- case 0:
- tsdev->pressure = 0;
- break;
-
- case 1:
- if (!tsdev->pressure)
- tsdev->pressure = 1;
- break;
- }
- }
- break;
- }
-
- if (type != EV_SYN || code != SYN_REPORT)
- return;
-
- list_for_each_entry(client, &tsdev->client_list, node) {
- int x, y, tmp;
-
- do_gettimeofday(&time);
- client->event[client->head].millisecs = time.tv_usec / 1000;
- client->event[client->head].pressure = tsdev->pressure;
-
- x = tsdev->x;
- y = tsdev->y;
-
- /* Calibration */
- if (!client->raw) {
- x = ((x * tsdev->cal.xscale) >> 8) + tsdev->cal.xtrans;
- y = ((y * tsdev->cal.yscale) >> 8) + tsdev->cal.ytrans;
- if (tsdev->cal.xyswap) {
- tmp = x; x = y; y = tmp;
- }
- }
-
- client->event[client->head].x = x;
- client->event[client->head].y = y;
- client->head = (client->head + 1) & (TSDEV_BUFFER_SIZE - 1);
- kill_fasync(&client->fasync, SIGIO, POLL_IN);
- }
- wake_up_interruptible(&tsdev->wait);
-}
-
-static int tsdev_connect(struct input_handler *handler, struct input_dev *dev,
- const struct input_device_id *id)
-{
- struct tsdev *tsdev;
- int minor, delta;
- int error;
-
- for (minor = 0; minor < TSDEV_MINORS / 2 && tsdev_table[minor]; minor++);
- if (minor >= TSDEV_MINORS / 2) {
- printk(KERN_ERR
- "tsdev: You have way too many touchscreens\n");
- return -ENFILE;
- }
-
- tsdev = kzalloc(sizeof(struct tsdev), GFP_KERNEL);
- if (!tsdev)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&tsdev->client_list);
- init_waitqueue_head(&tsdev->wait);
-
- tsdev->exist = 1;
- tsdev->minor = minor;
- tsdev->handle.dev = dev;
- tsdev->handle.name = tsdev->name;
- tsdev->handle.handler = handler;
- tsdev->handle.private = tsdev;
- snprintf(tsdev->name, sizeof(tsdev->name), "ts%d", minor);
-
- /* Precompute the rough calibration matrix */
- delta = dev->absmax [ABS_X] - dev->absmin [ABS_X] + 1;
- if (delta == 0)
- delta = 1;
- tsdev->cal.xscale = (xres << 8) / delta;
- tsdev->cal.xtrans = - ((dev->absmin [ABS_X] * tsdev->cal.xscale) >> 8);
-
- delta = dev->absmax [ABS_Y] - dev->absmin [ABS_Y] + 1;
- if (delta == 0)
- delta = 1;
- tsdev->cal.yscale = (yres << 8) / delta;
- tsdev->cal.ytrans = - ((dev->absmin [ABS_Y] * tsdev->cal.yscale) >> 8);
-
- snprintf(tsdev->dev.bus_id, sizeof(tsdev->dev.bus_id),
- "ts%d", minor);
- tsdev->dev.class = &input_class;
- tsdev->dev.parent = &dev->dev;
- tsdev->dev.devt = MKDEV(INPUT_MAJOR, TSDEV_MINOR_BASE + minor);
- tsdev->dev.release = tsdev_free;
- device_initialize(&tsdev->dev);
-
- tsdev_table[minor] = tsdev;
-
- error = device_add(&tsdev->dev);
- if (error)
- goto err_free_tsdev;
-
- error = input_register_handle(&tsdev->handle);
- if (error)
- goto err_delete_tsdev;
-
- return 0;
-
- err_delete_tsdev:
- device_del(&tsdev->dev);
- err_free_tsdev:
- put_device(&tsdev->dev);
- return error;
-}
-
-static void tsdev_disconnect(struct input_handle *handle)
-{
- struct tsdev *tsdev = handle->private;
- struct tsdev_client *client;
-
- input_unregister_handle(handle);
- device_del(&tsdev->dev);
-
- tsdev->exist = 0;
-
- if (tsdev->open) {
- input_close_device(handle);
- list_for_each_entry(client, &tsdev->client_list, node)
- kill_fasync(&client->fasync, SIGIO, POLL_HUP);
- wake_up_interruptible(&tsdev->wait);
- }
-
- put_device(&tsdev->dev);
-}
-
-static const struct input_device_id tsdev_ids[] = {
- {
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT,
- .evbit = { BIT(EV_KEY) | BIT(EV_REL) },
- .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
- .relbit = { BIT(REL_X) | BIT(REL_Y) },
- }, /* A mouse like device, at least one button, two relative axes */
-
- {
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
- .evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
- .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
- .absbit = { BIT(ABS_X) | BIT(ABS_Y) },
- }, /* A tablet like device, at least touch detection, two absolute axes */
-
- {
- .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT,
- .evbit = { BIT(EV_ABS) },
- .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) },
- }, /* A tablet like device with several gradations of pressure */
-
- {} /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE(input, tsdev_ids);
-
-static struct input_handler tsdev_handler = {
- .event = tsdev_event,
- .connect = tsdev_connect,
- .disconnect = tsdev_disconnect,
- .fops = &tsdev_fops,
- .minor = TSDEV_MINOR_BASE,
- .name = "tsdev",
- .id_table = tsdev_ids,
-};
-
-static int __init tsdev_init(void)
-{
- return input_register_handler(&tsdev_handler);
-}
-
-static void __exit tsdev_exit(void)
-{
- input_unregister_handler(&tsdev_handler);
-}
-
-module_init(tsdev_init);
-module_exit(tsdev_exit);
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 7c9cb7e..b39d1f5 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -328,7 +328,7 @@
l->cps = (l->transcount * HZ) / (jiffies - last_jiffies);
l->transcount = 0;
if (dev->net_verbose > 3)
- printk(KERN_DEBUG "%s: %d bogocps\n", l->name, l->cps);
+ printk(KERN_DEBUG "%s: %d bogocps\n", p->dev->name, l->cps);
if ((l->flags & ISDN_NET_CONNECTED) && (!l->dialstate)) {
anymore = 1;
l->huptimer++;
@@ -350,12 +350,12 @@
if (l->hupflags & ISDN_CHARGEHUP) {
if (l->hupflags & ISDN_WAITCHARGE) {
printk(KERN_DEBUG "isdn_net: Hupflags of %s are %X\n",
- l->name, l->hupflags);
+ p->dev->name, l->hupflags);
isdn_net_hangup(p->dev);
} else if (time_after(jiffies, l->chargetime + l->chargeint)) {
printk(KERN_DEBUG
"isdn_net: %s: chtime = %lu, chint = %d\n",
- l->name, l->chargetime, l->chargeint);
+ p->dev->name, l->chargetime, l->chargeint);
isdn_net_hangup(p->dev);
}
} else
@@ -442,8 +442,8 @@
#endif
isdn_net_lp_disconnected(lp);
isdn_all_eaz(lp->isdn_device, lp->isdn_channel);
- printk(KERN_INFO "%s: remote hangup\n", lp->name);
- printk(KERN_INFO "%s: Chargesum is %d\n", lp->name,
+ printk(KERN_INFO "%s: remote hangup\n", p->dev->name);
+ printk(KERN_INFO "%s: Chargesum is %d\n", p->dev->name,
lp->charge);
isdn_net_unbind_channel(lp);
return 1;
@@ -487,7 +487,7 @@
isdn_net_add_to_bundle(nd, lp);
}
}
- printk(KERN_INFO "isdn_net: %s connected\n", lp->name);
+ printk(KERN_INFO "isdn_net: %s connected\n", p->dev->name);
/* If first Chargeinfo comes before B-Channel connect,
* we correct the timestamp here.
*/
@@ -534,7 +534,7 @@
lp->hupflags |= ISDN_HAVECHARGE;
lp->chargetime = jiffies;
printk(KERN_DEBUG "isdn_net: Got CINF chargetime of %s now %lu\n",
- lp->name, lp->chargetime);
+ p->dev->name, lp->chargetime);
return 1;
}
}
@@ -565,7 +565,7 @@
#ifdef ISDN_DEBUG_NET_DIAL
if (lp->dialstate)
- printk(KERN_DEBUG "%s: dialstate=%d\n", lp->name, lp->dialstate);
+ printk(KERN_DEBUG "%s: dialstate=%d\n", p->dev->name, lp->dialstate);
#endif
switch (lp->dialstate) {
case 0:
@@ -578,7 +578,7 @@
lp->dial = lp->phone[1];
if (!lp->dial) {
printk(KERN_WARNING "%s: phone number deleted?\n",
- lp->name);
+ p->dev->name);
isdn_net_hangup(p->dev);
break;
}
@@ -632,13 +632,13 @@
cmd.arg = lp->isdn_channel;
if (!lp->dial) {
printk(KERN_WARNING "%s: phone number deleted?\n",
- lp->name);
+ p->dev->name);
isdn_net_hangup(p->dev);
break;
}
if (!strncmp(lp->dial->num, "LEASED", strlen("LEASED"))) {
lp->dialstate = 4;
- printk(KERN_INFO "%s: Open leased line ...\n", lp->name);
+ printk(KERN_INFO "%s: Open leased line ...\n", p->dev->name);
} else {
if(lp->dialtimeout > 0)
if (time_after(jiffies, lp->dialstarted + lp->dialtimeout)) {
@@ -688,7 +688,7 @@
dev->usage[i] |= ISDN_USAGE_OUTGOING;
isdn_info_update();
}
- printk(KERN_INFO "%s: dialing %d %s... %s\n", lp->name,
+ printk(KERN_INFO "%s: dialing %d %s... %s\n", p->dev->name,
lp->dialretry, cmd.parm.setup.phone,
(cmd.parm.setup.si1 == 1) ? "DOV" : "");
lp->dtimer = 0;
@@ -797,7 +797,7 @@
*/
if (lp->dtimer++ > lp->cbdelay)
{
- printk(KERN_INFO "%s: hangup waiting for callback ...\n", lp->name);
+ printk(KERN_INFO "%s: hangup waiting for callback ...\n", p->dev->name);
lp->dtimer = 0;
lp->dialstate = 4;
cmd.driver = lp->isdn_device;
@@ -810,7 +810,7 @@
break;
default:
printk(KERN_WARNING "isdn_net: Illegal dialstate %d for device %s\n",
- lp->dialstate, lp->name);
+ lp->dialstate, p->dev->name);
}
p = (isdn_net_dev *) p->next;
}
@@ -836,11 +836,11 @@
if (slp->flags & ISDN_NET_CONNECTED) {
printk(KERN_INFO
"isdn_net: hang up slave %s before %s\n",
- slp->name, lp->name);
+ lp->slave->name, d->name);
isdn_net_hangup(lp->slave);
}
}
- printk(KERN_INFO "isdn_net: local hangup %s\n", lp->name);
+ printk(KERN_INFO "isdn_net: local hangup %s\n", d->name);
#ifdef CONFIG_ISDN_PPP
if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP)
isdn_ppp_free(lp);
@@ -858,7 +858,7 @@
cmd.command = ISDN_CMD_HANGUP;
cmd.arg = lp->isdn_channel;
isdn_command(&cmd);
- printk(KERN_INFO "%s: Chargesum is %d\n", lp->name, lp->charge);
+ printk(KERN_INFO "%s: Chargesum is %d\n", d->name, lp->charge);
isdn_all_eaz(lp->isdn_device, lp->isdn_channel);
}
isdn_net_unbind_channel(lp);
@@ -885,7 +885,7 @@
/* fall back to old isdn_net_log_packet method() */
char * buf = skb->data;
- printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->name);
+ printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->netdev->dev->name);
p = buf;
proto = ETH_P_IP;
switch (lp->p_encap) {
@@ -1023,7 +1023,7 @@
ret = isdn_writebuf_skb_stub(lp->isdn_device, lp->isdn_channel, 1, skb);
if (ret != len) {
/* we should never get here */
- printk(KERN_WARNING "%s: HL driver queue full\n", lp->name);
+ printk(KERN_WARNING "%s: HL driver queue full\n", lp->netdev->dev->name);
goto error;
}
@@ -1461,7 +1461,7 @@
mod_timer(&lp->cisco_timer, expires);
printk(KERN_INFO "%s: Keepalive period set "
"to %d seconds.\n",
- lp->name, lp->cisco_keepalive_period);
+ dev->name, lp->cisco_keepalive_period);
}
break;
@@ -1512,7 +1512,7 @@
lp->cisco_line_state = 0;
printk (KERN_WARNING
"UPDOWN: Line protocol on Interface %s,"
- " changed state to down\n", lp->name);
+ " changed state to down\n", lp->netdev->dev->name);
/* should stop routing higher-level data accross */
} else if ((!lp->cisco_line_state) &&
(myseq_diff >= 0) && (myseq_diff <= 2)) {
@@ -1520,14 +1520,14 @@
lp->cisco_line_state = 1;
printk (KERN_WARNING
"UPDOWN: Line protocol on Interface %s,"
- " changed state to up\n", lp->name);
+ " changed state to up\n", lp->netdev->dev->name);
/* restart routing higher-level data accross */
}
if (lp->cisco_debserint)
printk (KERN_DEBUG "%s: HDLC "
"myseq %lu, mineseen %lu%c, yourseen %lu, %s\n",
- lp->name, last_cisco_myseq, lp->cisco_mineseen,
+ lp->netdev->dev->name, last_cisco_myseq, lp->cisco_mineseen,
((last_cisco_myseq == lp->cisco_mineseen) ? '*' : 040),
lp->cisco_yourseq,
((lp->cisco_line_state) ? "line up" : "line down"));
@@ -1682,7 +1682,7 @@
"remote ip: %d.%d.%d.%d, "
"local ip: %d.%d.%d.%d "
"mask: %d.%d.%d.%d\n",
- lp->name,
+ lp->netdev->dev->name,
HIPQUAD(addr),
HIPQUAD(local),
HIPQUAD(mask));
@@ -1690,7 +1690,7 @@
slarp_reply_out:
printk(KERN_INFO "%s: got invalid slarp "
"reply (%d.%d.%d.%d/%d.%d.%d.%d) "
- "- ignored\n", lp->name,
+ "- ignored\n", lp->netdev->dev->name,
HIPQUAD(addr), HIPQUAD(mask));
break;
case CISCO_SLARP_KEEPALIVE:
@@ -1701,7 +1701,8 @@
lp->cisco_last_slarp_in) {
printk(KERN_DEBUG "%s: Keepalive period mismatch - "
"is %d but should be %d.\n",
- lp->name, period, lp->cisco_keepalive_period);
+ lp->netdev->dev->name, period,
+ lp->cisco_keepalive_period);
}
lp->cisco_last_slarp_in = jiffies;
p += get_u32(p, &my_seq);
@@ -1732,12 +1733,12 @@
if (addr != CISCO_ADDR_UNICAST && addr != CISCO_ADDR_BROADCAST) {
printk(KERN_WARNING "%s: Unknown Cisco addr 0x%02x\n",
- lp->name, addr);
+ lp->netdev->dev->name, addr);
goto out_free;
}
if (ctrl != CISCO_CTRL) {
printk(KERN_WARNING "%s: Unknown Cisco ctrl 0x%02x\n",
- lp->name, ctrl);
+ lp->netdev->dev->name, ctrl);
goto out_free;
}
@@ -1748,7 +1749,8 @@
case CISCO_TYPE_CDP:
if (lp->cisco_debserint)
printk(KERN_DEBUG "%s: Received CDP packet. use "
- "\"no cdp enable\" on cisco.\n", lp->name);
+ "\"no cdp enable\" on cisco.\n",
+ lp->netdev->dev->name);
goto out_free;
default:
/* no special cisco protocol */
@@ -1843,7 +1845,7 @@
};
#endif /* CONFIG_ISDN_X25 */
printk(KERN_WARNING "%s: unknown encapsulation, dropping\n",
- lp->name);
+ lp->netdev->dev->name);
kfree_skb(skb);
return;
}
@@ -2174,7 +2176,7 @@
wret = matchret;
#ifdef ISDN_DEBUG_NET_ICALL
printk(KERN_DEBUG "n_fi: if='%s', l.msn=%s, l.flags=%d, l.dstate=%d\n",
- lp->name, lp->msn, lp->flags, lp->dialstate);
+ p->dev->name, lp->msn, lp->flags, lp->dialstate);
#endif
if ((!matchret) && /* EAZ is matching */
(((!(lp->flags & ISDN_NET_CONNECTED)) && /* but not connected */
@@ -2277,7 +2279,7 @@
* */
if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) {
printk(KERN_INFO "incoming call, interface %s `stopped' -> rejected\n",
- lp->name);
+ p->dev->name);
return 3;
}
/*
@@ -2286,7 +2288,7 @@
*/
if (!isdn_net_device_started(p)) {
printk(KERN_INFO "%s: incoming call, interface down -> rejected\n",
- lp->name);
+ p->dev->name);
return 3;
}
/* Interface is up, now see if it's a slave. If so, see if
@@ -2294,8 +2296,8 @@
*/
if (lp->master) {
isdn_net_local *mlp = (isdn_net_local *) lp->master->priv;
- printk(KERN_DEBUG "ICALLslv: %s\n", lp->name);
- printk(KERN_DEBUG "master=%s\n", mlp->name);
+ printk(KERN_DEBUG "ICALLslv: %s\n", p->dev->name);
+ printk(KERN_DEBUG "master=%s\n", lp->master->name);
if (mlp->flags & ISDN_NET_CONNECTED) {
printk(KERN_DEBUG "master online\n");
/* Master is online, find parent-slave (master if first slave) */
@@ -2322,11 +2324,11 @@
* */
if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) {
printk(KERN_INFO "incoming call for callback, interface %s `off' -> rejected\n",
- lp->name);
+ p->dev->name);
return 3;
}
printk(KERN_DEBUG "%s: call from %s -> %s, start callback\n",
- lp->name, nr, eaz);
+ p->dev->name, nr, eaz);
if (lp->phone[1]) {
/* Grab a free ISDN-Channel */
spin_lock_irqsave(&dev->lock, flags);
@@ -2340,7 +2342,8 @@
lp->msn)
) < 0) {
- printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", lp->name);
+ printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n",
+ p->dev->name);
spin_unlock_irqrestore(&dev->lock, flags);
return 0;
}
@@ -2361,11 +2364,12 @@
/* Initiate dialing by returning 2 or 4 */
return (lp->flags & ISDN_NET_CBHUP) ? 2 : 4;
} else
- printk(KERN_WARNING "isdn_net: %s: No phone number\n", lp->name);
+ printk(KERN_WARNING "isdn_net: %s: No phone number\n",
+ p->dev->name);
return 0;
} else {
- printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", lp->name, nr,
- eaz);
+ printk(KERN_DEBUG "%s: call from %s -> %s accepted\n",
+ p->dev->name, nr, eaz);
/* if this interface is dialing, it does it probably on a different
device, so free this device */
if ((lp->dialstate == 4) || (lp->dialstate == 12)) {
@@ -2424,7 +2428,7 @@
isdn_net_dev *p = dev->netdev;
while (p) {
- if (!strcmp(p->local->name, name))
+ if (!strcmp(p->dev->name, name))
return p;
p = (isdn_net_dev *) p->next;
}
@@ -2453,7 +2457,8 @@
lp->pre_device,
lp->pre_channel,
lp->msn)) < 0) {
- printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", lp->name);
+ printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n",
+ lp->netdev->dev->name);
spin_unlock_irqrestore(&dev->lock, flags);
return -EAGAIN;
}
@@ -2556,7 +2561,7 @@
return NULL;
}
if (name == NULL)
- name = " ";
+ return NULL;
if (!(netdev = kzalloc(sizeof(isdn_net_dev), GFP_KERNEL))) {
printk(KERN_WARNING "isdn_net: Could not allocate net-device\n");
return NULL;
@@ -2568,7 +2573,6 @@
return NULL;
}
netdev->local = netdev->dev->priv;
- strcpy(netdev->local->name, netdev->dev->name);
netdev->dev->init = isdn_net_init;
if (master) {
/* Device shall be a slave */
@@ -2673,7 +2677,7 @@
#endif
if (isdn_net_device_started(p)) {
printk(KERN_WARNING "%s: cannot change encap when if is up\n",
- lp->name);
+ p->dev->name);
return -EBUSY;
}
#ifdef CONFIG_ISDN_X25
@@ -2698,7 +2702,7 @@
case ISDN_NET_ENCAP_SYNCPPP:
#ifndef CONFIG_ISDN_PPP
printk(KERN_WARNING "%s: SyncPPP support not configured\n",
- lp->name);
+ p->dev->name);
return -EINVAL;
#else
p->dev->type = ARPHRD_PPP; /* change ARP type */
@@ -2709,7 +2713,7 @@
case ISDN_NET_ENCAP_X25IFACE:
#ifndef CONFIG_ISDN_X25
printk(KERN_WARNING "%s: isdn-x25 support not configured\n",
- p->local->name);
+ p->dev->name);
return -EINVAL;
#else
p->dev->type = ARPHRD_X25; /* change ARP type */
@@ -2725,7 +2729,7 @@
break;
printk(KERN_WARNING
"%s: encapsulation protocol %d not supported\n",
- p->local->name, cfg->p_encap);
+ p->dev->name, cfg->p_encap);
return -EINVAL;
}
if (strlen(cfg->drvid)) {
@@ -2902,13 +2906,18 @@
cfg->pppbind = lp->pppbind;
cfg->dialtimeout = lp->dialtimeout >= 0 ? lp->dialtimeout / HZ : -1;
cfg->dialwait = lp->dialwait / HZ;
- if (lp->slave)
- strcpy(cfg->slave, ((isdn_net_local *) lp->slave->priv)->name);
- else
+ if (lp->slave) {
+ if (strlen(lp->slave->name) > 8)
+ strcpy(cfg->slave, "too-long");
+ else
+ strcpy(cfg->slave, lp->slave->name);
+ } else
cfg->slave[0] = '\0';
- if (lp->master)
- strcpy(cfg->master, ((isdn_net_local *) lp->master->priv)->name);
- else
+ if (lp->master) {
+ if (strlen(lp->master->name) > 8)
+ strcpy(cfg->master, "too-long");
+ strcpy(cfg->master, lp->master->name);
+ } else
cfg->master[0] = '\0';
return 0;
}
@@ -2978,7 +2987,8 @@
isdn_net_dev *p = isdn_net_findif(phone->name);
int ch, dv, idx;
- if (!p) return -ENODEV;
+ if (!p)
+ return -ENODEV;
/*
* Theoretical race: while this executes, the remote number might
* become invalid (hang up) or change (new connection), resulting
@@ -2987,14 +2997,18 @@
*/
ch = p->local->isdn_channel;
dv = p->local->isdn_device;
- if(ch<0 && dv<0) return -ENOTCONN;
+ if(ch < 0 && dv < 0)
+ return -ENOTCONN;
idx = isdn_dc2minor(dv, ch);
- if (idx<0) return -ENODEV;
+ if (idx <0 )
+ return -ENODEV;
/* for pre-bound channels, we need this extra check */
- if ( strncmp(dev->num[idx],"???",3) == 0 ) return -ENOTCONN;
- strncpy(phone->phone,dev->num[idx],ISDN_MSNLEN);
- phone->outgoing=USG_OUTGOING(dev->usage[idx]);
- if ( copy_to_user(peer,phone,sizeof(*peer)) ) return -EFAULT;
+ if (strncmp(dev->num[idx], "???", 3) == 0)
+ return -ENOTCONN;
+ strncpy(phone->phone, dev->num[idx], ISDN_MSNLEN);
+ phone->outgoing = USG_OUTGOING(dev->usage[idx]);
+ if (copy_to_user(peer, phone, sizeof(*peer)))
+ return -EFAULT;
return 0;
}
/*
@@ -3113,18 +3127,18 @@
dev->netdev = p->next;
if (p->local->slave) {
/* If this interface has a slave, remove it also */
- char *slavename = ((isdn_net_local *) (p->local->slave->priv))->name;
+ char *slavename = p->local->slave->name;
isdn_net_dev *n = dev->netdev;
q = NULL;
while (n) {
- if (!strcmp(n->local->name, slavename)) {
+ if (!strcmp(n->dev->name, slavename)) {
spin_unlock_irqrestore(&dev->lock, flags);
isdn_net_realrm(n, q);
spin_lock_irqsave(&dev->lock, flags);
break;
}
q = n;
- n = (isdn_net_dev *) n->next;
+ n = (isdn_net_dev *)n->next;
}
}
spin_unlock_irqrestore(&dev->lock, flags);
@@ -3152,7 +3166,7 @@
p = dev->netdev;
q = NULL;
while (p) {
- if (!strcmp(p->local->name, name)) {
+ if (!strcmp(p->dev->name, name)) {
spin_unlock_irqrestore(&dev->lock, flags);
return (isdn_net_realrm(p, q));
}
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 0e5e59f..9f5fe37 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -190,9 +190,11 @@
retval = -1;
goto out;
}
- unit = isdn_ppp_if_get_unit(lp->name); /* get unit number from interface name .. ugly! */
+ /* get unit number from interface name .. ugly! */
+ unit = isdn_ppp_if_get_unit(lp->netdev->dev->name);
if (unit < 0) {
- printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", lp->name);
+ printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n",
+ lp->netdev->dev->name);
retval = -1;
goto out;
}
@@ -507,7 +509,8 @@
case PPPIOCGIFNAME:
if(!lp)
return -EINVAL;
- if ((r = set_arg(argp, lp->name, strlen(lp->name))))
+ if ((r = set_arg(argp, lp->netdev->dev->name,
+ strlen(lp->netdev->dev->name))))
return r;
break;
case PPPIOCGMPFLAGS: /* get configuration flags */
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 56cd899..77f50b6 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -172,6 +172,7 @@
config MAC_EMUMOUSEBTN
bool "Support for mouse button 2+3 emulation"
+ select INPUT
help
This provides generic support for emulating the 2nd and 3rd mouse
button with keypresses. If you say Y here, the emulation is still
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index 48d17bf..8cce016 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -52,6 +52,11 @@
MODULE_AUTHOR("Franz Sirl <Franz.Sirl-kernel@lauterbach.com>");
+static int restore_capslock_events;
+module_param(restore_capslock_events, int, 0644);
+MODULE_PARM_DESC(restore_capslock_events,
+ "Produce keypress events for capslock on both keyup and keydown.");
+
#define KEYB_KEYREG 0 /* register # for key up/down data */
#define KEYB_LEDREG 2 /* register # for leds on ADB keyboard */
#define MOUSE_DATAREG 0 /* reg# for movement/button codes from mouse */
@@ -217,6 +222,8 @@
#define FLAG_FN_KEY_PRESSED 0x00000001
#define FLAG_POWER_FROM_FN 0x00000002
#define FLAG_EMU_FWDEL_DOWN 0x00000004
+#define FLAG_CAPSLOCK_TRANSLATE 0x00000008
+#define FLAG_CAPSLOCK_DOWN 0x00000010
static struct adbhid *adbhid[16];
@@ -272,19 +279,50 @@
}
static void
-adbhid_input_keycode(int id, int keycode, int repeat)
+adbhid_input_keycode(int id, int scancode, int repeat)
{
struct adbhid *ahid = adbhid[id];
- int up_flag, key;
+ int keycode, up_flag;
- up_flag = (keycode & 0x80);
- keycode &= 0x7f;
+ keycode = scancode & 0x7f;
+ up_flag = scancode & 0x80;
+
+ if (restore_capslock_events) {
+ if (keycode == ADB_KEY_CAPSLOCK && !up_flag) {
+ /* Key pressed, turning on the CapsLock LED.
+ * The next 0xff will be interpreted as a release. */
+ ahid->flags |= FLAG_CAPSLOCK_TRANSLATE
+ | FLAG_CAPSLOCK_DOWN;
+ } else if (scancode == 0xff) {
+ /* Scancode 0xff usually signifies that the capslock
+ * key was either pressed or released. */
+ if (ahid->flags & FLAG_CAPSLOCK_TRANSLATE) {
+ keycode = ADB_KEY_CAPSLOCK;
+ if (ahid->flags & FLAG_CAPSLOCK_DOWN) {
+ /* Key released */
+ up_flag = 1;
+ ahid->flags &= ~FLAG_CAPSLOCK_DOWN;
+ } else {
+ /* Key pressed */
+ up_flag = 0;
+ ahid->flags &= ~FLAG_CAPSLOCK_TRANSLATE;
+ }
+ } else {
+ printk(KERN_INFO "Spurious caps lock event "
+ "(scancode 0xff).");
+ }
+ }
+ }
switch (keycode) {
- case ADB_KEY_CAPSLOCK: /* Generate down/up events for CapsLock everytime. */
- input_report_key(ahid->input, KEY_CAPSLOCK, 1);
- input_report_key(ahid->input, KEY_CAPSLOCK, 0);
- input_sync(ahid->input);
+ case ADB_KEY_CAPSLOCK:
+ if (!restore_capslock_events) {
+ /* Generate down/up events for CapsLock everytime. */
+ input_report_key(ahid->input, KEY_CAPSLOCK, 1);
+ input_sync(ahid->input);
+ input_report_key(ahid->input, KEY_CAPSLOCK, 0);
+ input_sync(ahid->input);
+ }
return;
#ifdef CONFIG_PPC_PMAC
case ADB_KEY_POWER_OLD: /* Power key on PBook 3400 needs remapping */
@@ -296,7 +334,7 @@
keycode = ADB_KEY_POWER;
}
break;
- case ADB_KEY_POWER:
+ case ADB_KEY_POWER:
/* Fn + Command will produce a bogus "power" keycode */
if (ahid->flags & FLAG_FN_KEY_PRESSED) {
keycode = ADB_KEY_CMD;
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 216948d..81e068f 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -945,15 +945,15 @@
KEY_UNKNOWN, /* 0x0C: FN+BACKSPACE */
KEY_UNKNOWN, /* 0x0D: FN+INSERT */
KEY_UNKNOWN, /* 0x0E: FN+DELETE */
- KEY_RESERVED, /* 0x0F: FN+HOME (brightness up) */
+ KEY_BRIGHTNESSUP, /* 0x0F: FN+HOME (brightness up) */
/* Scan codes 0x10 to 0x1F: Extended ACPI HKEY hot keys */
- KEY_RESERVED, /* 0x10: FN+END (brightness down) */
+ KEY_BRIGHTNESSDOWN, /* 0x10: FN+END (brightness down) */
KEY_RESERVED, /* 0x11: FN+PGUP (thinklight toggle) */
KEY_UNKNOWN, /* 0x12: FN+PGDOWN */
KEY_ZOOM, /* 0x13: FN+SPACE (zoom) */
- KEY_RESERVED, /* 0x14: VOLUME UP */
- KEY_RESERVED, /* 0x15: VOLUME DOWN */
- KEY_RESERVED, /* 0x16: MUTE */
+ KEY_VOLUMEUP, /* 0x14: VOLUME UP */
+ KEY_VOLUMEDOWN, /* 0x15: VOLUME DOWN */
+ KEY_MUTE, /* 0x16: MUTE */
KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */
/* (assignments unknown, please report if found) */
KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
@@ -974,9 +974,9 @@
KEY_RESERVED, /* 0x11: FN+PGUP (thinklight toggle) */
KEY_UNKNOWN, /* 0x12: FN+PGDOWN */
KEY_ZOOM, /* 0x13: FN+SPACE (zoom) */
- KEY_RESERVED, /* 0x14: VOLUME UP */
- KEY_RESERVED, /* 0x15: VOLUME DOWN */
- KEY_RESERVED, /* 0x16: MUTE */
+ KEY_VOLUMEUP, /* 0x14: VOLUME UP */
+ KEY_VOLUMEDOWN, /* 0x15: VOLUME DOWN */
+ KEY_MUTE, /* 0x16: MUTE */
KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */
/* (assignments unknown, please report if found) */
KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index cb933ac..8211329 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -14,20 +14,20 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/map.h>
#include <linux/mtd/partitions.h>
#include <asm/io.h>
#include <asm/hardware.h>
+#include <asm/cacheflush.h>
#include <asm/mach/flash.h>
static void pxa2xx_map_inval_cache(struct map_info *map, unsigned long from,
ssize_t len)
{
- consistent_sync((char *)map->cached + from, len, DMA_FROM_DEVICE);
+ flush_ioremap_region(map->phys, map->cached, from, len);
}
struct pxa2xx_flash_info {
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9c635a2..8f99a06 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1780,6 +1780,15 @@
To compile this driver as a module, choose M here: the module
will be called sc92031. This is recommended.
+config CPMAC
+ tristate "TI AR7 CPMAC Ethernet support (EXPERIMENTAL)"
+ depends on NET_ETHERNET && EXPERIMENTAL && AR7
+ select PHYLIB
+ select FIXED_PHY
+ select FIXED_MII_100_FDX
+ help
+ TI AR7 CPMAC Ethernet support
+
config NET_POCKET
bool "Pocket and portable adapters"
depends on PARPORT
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index d2e0f35..22f78cb 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -159,6 +159,7 @@
obj-$(CONFIG_8139TOO) += 8139too.o
obj-$(CONFIG_ZNET) += znet.o
obj-$(CONFIG_LAN_SAA9730) += saa9730.o
+obj-$(CONFIG_CPMAC) += cpmac.o
obj-$(CONFIG_DEPCA) += depca.o
obj-$(CONFIG_EWRK3) += ewrk3.o
obj-$(CONFIG_ATP) += atp.o
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index b46c5d8..185f98e 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -54,13 +54,16 @@
#include <linux/delay.h>
#include <linux/crc32.h>
#include <linux/phy.h>
+
+#include <asm/cpu.h>
#include <asm/mipsregs.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/processor.h>
-#include <asm/mach-au1x00/au1000.h>
-#include <asm/cpu.h>
+#include <au1000.h>
+#include <prom.h>
+
#include "au1000_eth.h"
#ifdef AU1000_ETH_DEBUG
@@ -96,11 +99,6 @@
static void au1000_adjust_link(struct net_device *);
static void enable_mac(struct net_device *, int);
-// externs
-extern int get_ethernet_addr(char *ethernet_addr);
-extern void str2eaddr(unsigned char *ea, unsigned char *str);
-extern char * prom_getcmdline(void);
-
/*
* Theory of operation
*
@@ -619,7 +617,6 @@
struct au1000_private *aup = NULL;
struct net_device *dev = NULL;
db_dest_t *pDB, *pDBfree;
- char *pmac, *argptr;
char ethaddr[6];
int irq, i, err;
u32 base, macen;
@@ -677,21 +674,12 @@
au_macs[port_num] = aup;
if (port_num == 0) {
- /* Check the environment variables first */
- if (get_ethernet_addr(ethaddr) == 0)
+ if (prom_get_ethernet_addr(ethaddr) == 0)
memcpy(au1000_mac_addr, ethaddr, sizeof(au1000_mac_addr));
else {
- /* Check command line */
- argptr = prom_getcmdline();
- if ((pmac = strstr(argptr, "ethaddr=")) == NULL)
- printk(KERN_INFO "%s: No MAC address found\n",
- dev->name);
+ printk(KERN_INFO "%s: No MAC address found\n",
+ dev->name);
/* Use the hard coded MAC addresses */
- else {
- str2eaddr(ethaddr, pmac + strlen("ethaddr="));
- memcpy(au1000_mac_addr, ethaddr,
- sizeof(au1000_mac_addr));
- }
}
setup_hw_rings(aup, MAC0_RX_DMA_ADDR, MAC0_TX_DMA_ADDR);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 64bfec3..db80f24 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -98,6 +98,7 @@
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
static char *arp_validate = NULL;
+static int fail_over_mac = 0;
struct bond_params bonding_defaults;
module_param(max_bonds, int, 0);
@@ -131,6 +132,8 @@
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
module_param(arp_validate, charp, 0);
MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
+module_param(fail_over_mac, int, 0);
+MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on.");
/*----------------------------- Global variables ----------------------------*/
@@ -1096,7 +1099,21 @@
if (new_active) {
bond_set_slave_active_flags(new_active);
}
- bond_send_gratuitous_arp(bond);
+
+ /* when bonding does not set the slave MAC address, the bond MAC
+ * address is the one of the active slave.
+ */
+ if (new_active && bond->params.fail_over_mac)
+ memcpy(bond->dev->dev_addr, new_active->dev->dev_addr,
+ new_active->dev->addr_len);
+ if (bond->curr_active_slave &&
+ test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &bond->curr_active_slave->dev->state)) {
+ dprintk("delaying gratuitous arp on %s\n",
+ bond->curr_active_slave->dev->name);
+ bond->send_grat_arp = 1;
+ } else
+ bond_send_gratuitous_arp(bond);
}
}
@@ -1217,7 +1234,8 @@
struct slave *slave;
struct net_device *bond_dev = bond->dev;
unsigned long features = bond_dev->features;
- unsigned short max_hard_header_len = ETH_HLEN;
+ unsigned short max_hard_header_len = max((u16)ETH_HLEN,
+ bond_dev->hard_header_len);
int i;
features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
@@ -1238,6 +1256,23 @@
return 0;
}
+
+static void bond_setup_by_slave(struct net_device *bond_dev,
+ struct net_device *slave_dev)
+{
+ struct bonding *bond = bond_dev->priv;
+
+ bond_dev->neigh_setup = slave_dev->neigh_setup;
+
+ bond_dev->type = slave_dev->type;
+ bond_dev->hard_header_len = slave_dev->hard_header_len;
+ bond_dev->addr_len = slave_dev->addr_len;
+
+ memcpy(bond_dev->broadcast, slave_dev->broadcast,
+ slave_dev->addr_len);
+ bond->setup_by_slave = 1;
+}
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
{
@@ -1258,8 +1293,9 @@
/* bond must be initialized by bond_open() before enslaving */
if (!(bond_dev->flags & IFF_UP)) {
- dprintk("Error, master_dev is not up\n");
- return -EPERM;
+ printk(KERN_WARNING DRV_NAME
+ " %s: master_dev is not up in bond_enslave\n",
+ bond_dev->name);
}
/* already enslaved */
@@ -1312,14 +1348,42 @@
goto err_undo_flags;
}
+ /* set bonding device ether type by slave - bonding netdevices are
+ * created with ether_setup, so when the slave type is not ARPHRD_ETHER
+ * there is a need to override some of the type dependent attribs/funcs.
+ *
+ * bond ether type mutual exclusion - don't allow slaves of dissimilar
+ * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
+ */
+ if (bond->slave_cnt == 0) {
+ if (slave_dev->type != ARPHRD_ETHER)
+ bond_setup_by_slave(bond_dev, slave_dev);
+ } else if (bond_dev->type != slave_dev->type) {
+ printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different "
+ "from other slaves (%d), can not enslave it.\n",
+ slave_dev->name,
+ slave_dev->type, bond_dev->type);
+ res = -EINVAL;
+ goto err_undo_flags;
+ }
+
if (slave_dev->set_mac_address == NULL) {
- printk(KERN_ERR DRV_NAME
- ": %s: Error: The slave device you specified does "
- "not support setting the MAC address. "
- "Your kernel likely does not support slave "
- "devices.\n", bond_dev->name);
- res = -EOPNOTSUPP;
- goto err_undo_flags;
+ if (bond->slave_cnt == 0) {
+ printk(KERN_WARNING DRV_NAME
+ ": %s: Warning: The first slave device "
+ "specified does not support setting the MAC "
+ "address. Enabling the fail_over_mac option.",
+ bond_dev->name);
+ bond->params.fail_over_mac = 1;
+ } else if (!bond->params.fail_over_mac) {
+ printk(KERN_ERR DRV_NAME
+ ": %s: Error: The slave device specified "
+ "does not support setting the MAC address, "
+ "but fail_over_mac is not enabled.\n"
+ , bond_dev->name);
+ res = -EOPNOTSUPP;
+ goto err_undo_flags;
+ }
}
new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
@@ -1340,16 +1404,18 @@
*/
memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
- /*
- * Set slave to master's mac address. The application already
- * set the master's mac address to that of the first slave
- */
- memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
- addr.sa_family = slave_dev->type;
- res = dev_set_mac_address(slave_dev, &addr);
- if (res) {
- dprintk("Error %d calling set_mac_address\n", res);
- goto err_free;
+ if (!bond->params.fail_over_mac) {
+ /*
+ * Set slave to master's mac address. The application already
+ * set the master's mac address to that of the first slave
+ */
+ memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
+ addr.sa_family = slave_dev->type;
+ res = dev_set_mac_address(slave_dev, &addr);
+ if (res) {
+ dprintk("Error %d calling set_mac_address\n", res);
+ goto err_free;
+ }
}
res = netdev_set_master(slave_dev, bond_dev);
@@ -1574,9 +1640,11 @@
dev_close(slave_dev);
err_restore_mac:
- memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
- addr.sa_family = slave_dev->type;
- dev_set_mac_address(slave_dev, &addr);
+ if (!bond->params.fail_over_mac) {
+ memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
+ addr.sa_family = slave_dev->type;
+ dev_set_mac_address(slave_dev, &addr);
+ }
err_free:
kfree(new_slave);
@@ -1749,10 +1817,12 @@
/* close slave before restoring its mac address */
dev_close(slave_dev);
- /* restore original ("permanent") mac address */
- memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
- addr.sa_family = slave_dev->type;
- dev_set_mac_address(slave_dev, &addr);
+ if (!bond->params.fail_over_mac) {
+ /* restore original ("permanent") mac address */
+ memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
+ addr.sa_family = slave_dev->type;
+ dev_set_mac_address(slave_dev, &addr);
+ }
slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
IFF_SLAVE_INACTIVE | IFF_BONDING |
@@ -1764,6 +1834,35 @@
}
/*
+* Destroy a bonding device.
+* Must be under rtnl_lock when this function is called.
+*/
+void bond_destroy(struct bonding *bond)
+{
+ bond_deinit(bond->dev);
+ bond_destroy_sysfs_entry(bond);
+ unregister_netdevice(bond->dev);
+}
+
+/*
+* First release a slave and than destroy the bond if no more slaves iare left.
+* Must be under rtnl_lock when this function is called.
+*/
+int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
+{
+ struct bonding *bond = bond_dev->priv;
+ int ret;
+
+ ret = bond_release(bond_dev, slave_dev);
+ if ((ret == 0) && (bond->slave_cnt == 0)) {
+ printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n",
+ bond_dev->name, bond_dev->name);
+ bond_destroy(bond);
+ }
+ return ret;
+}
+
+/*
* This function releases all slaves.
*/
static int bond_release_all(struct net_device *bond_dev)
@@ -1839,10 +1938,12 @@
/* close slave before restoring its mac address */
dev_close(slave_dev);
- /* restore original ("permanent") mac address*/
- memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
- addr.sa_family = slave_dev->type;
- dev_set_mac_address(slave_dev, &addr);
+ if (!bond->params.fail_over_mac) {
+ /* restore original ("permanent") mac address*/
+ memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
+ addr.sa_family = slave_dev->type;
+ dev_set_mac_address(slave_dev, &addr);
+ }
slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
IFF_SLAVE_INACTIVE);
@@ -2013,6 +2114,17 @@
* program could monitor the link itself if needed.
*/
+ if (bond->send_grat_arp) {
+ if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &bond->curr_active_slave->dev->state))
+ dprintk("Needs to send gratuitous arp but not yet\n");
+ else {
+ dprintk("sending delayed gratuitous arp on on %s\n",
+ bond->curr_active_slave->dev->name);
+ bond_send_gratuitous_arp(bond);
+ bond->send_grat_arp = 0;
+ }
+ }
read_lock(&bond->curr_slave_lock);
oldcurrent = bond->curr_active_slave;
read_unlock(&bond->curr_slave_lock);
@@ -2414,7 +2526,7 @@
if (bond->master_ip) {
bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
- bond->master_ip, 0);
+ bond->master_ip, 0);
}
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -2951,9 +3063,15 @@
curr = bond->curr_active_slave;
read_unlock(&bond->curr_slave_lock);
- seq_printf(seq, "Bonding Mode: %s\n",
+ seq_printf(seq, "Bonding Mode: %s",
bond_mode_name(bond->params.mode));
+ if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
+ bond->params.fail_over_mac)
+ seq_printf(seq, " (fail_over_mac)");
+
+ seq_printf(seq, "\n");
+
if (bond->params.mode == BOND_MODE_XOR ||
bond->params.mode == BOND_MODE_8023AD) {
seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
@@ -3248,6 +3366,11 @@
* ... Or is it this?
*/
break;
+ case NETDEV_GOING_DOWN:
+ dprintk("slave %s is going down\n", slave_dev->name);
+ if (bond->setup_by_slave)
+ bond_release_and_destroy(bond_dev, slave_dev);
+ break;
case NETDEV_CHANGEMTU:
/*
* TODO: Should slaves be allowed to
@@ -3880,6 +4003,13 @@
dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
+ /*
+ * If fail_over_mac is enabled, do nothing and return success.
+ * Returning an error causes ifenslave to fail.
+ */
+ if (bond->params.fail_over_mac)
+ return 0;
+
if (!is_valid_ether_addr(sa->sa_data)) {
return -EADDRNOTAVAIL;
}
@@ -4217,6 +4347,8 @@
bond->current_arp_slave = NULL;
bond->primary_slave = NULL;
bond->dev = bond_dev;
+ bond->send_grat_arp = 0;
+ bond->setup_by_slave = 0;
INIT_LIST_HEAD(&bond->vlan_list);
/* Initialize the device entry points */
@@ -4265,7 +4397,6 @@
#ifdef CONFIG_PROC_FS
bond_create_proc_entry(bond);
#endif
-
list_add_tail(&bond->bond_list, &bond_dev_list);
return 0;
@@ -4599,6 +4730,11 @@
primary = NULL;
}
+ if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP))
+ printk(KERN_WARNING DRV_NAME
+ ": Warning: fail_over_mac only affects "
+ "active-backup mode.\n");
+
/* fill params struct with the proper values */
params->mode = bond_mode;
params->xmit_policy = xmit_hashtype;
@@ -4610,6 +4746,7 @@
params->use_carrier = use_carrier;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
+ params->fail_over_mac = fail_over_mac;
if (primary) {
strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 6f49ca7..80c0c8c4 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -164,9 +164,7 @@
printk(KERN_INFO DRV_NAME
": %s is being deleted...\n",
bond->dev->name);
- bond_deinit(bond->dev);
- bond_destroy_sysfs_entry(bond);
- unregister_netdevice(bond->dev);
+ bond_destroy(bond);
rtnl_unlock();
goto out;
}
@@ -260,17 +258,16 @@
char command[IFNAMSIZ + 1] = { 0, };
char *ifname;
int i, res, found, ret = count;
+ u32 original_mtu;
struct slave *slave;
struct net_device *dev = NULL;
struct bonding *bond = to_bond(d);
/* Quick sanity check -- is the bond interface up? */
if (!(bond->dev->flags & IFF_UP)) {
- printk(KERN_ERR DRV_NAME
- ": %s: Unable to update slaves because interface is down.\n",
+ printk(KERN_WARNING DRV_NAME
+ ": %s: doing slave updates when interface is down.\n",
bond->dev->name);
- ret = -EPERM;
- goto out;
}
/* Note: We can't hold bond->lock here, as bond_create grabs it. */
@@ -327,6 +324,7 @@
}
/* Set the slave's MTU to match the bond */
+ original_mtu = dev->mtu;
if (dev->mtu != bond->dev->mtu) {
if (dev->change_mtu) {
res = dev->change_mtu(dev,
@@ -341,6 +339,9 @@
}
rtnl_lock();
res = bond_enslave(bond->dev, dev);
+ bond_for_each_slave(bond, slave, i)
+ if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0)
+ slave->original_mtu = original_mtu;
rtnl_unlock();
if (res) {
ret = res;
@@ -353,13 +354,17 @@
bond_for_each_slave(bond, slave, i)
if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
dev = slave->dev;
+ original_mtu = slave->original_mtu;
break;
}
if (dev) {
printk(KERN_INFO DRV_NAME ": %s: Removing slave %s\n",
bond->dev->name, dev->name);
rtnl_lock();
- res = bond_release(bond->dev, dev);
+ if (bond->setup_by_slave)
+ res = bond_release_and_destroy(bond->dev, dev);
+ else
+ res = bond_release(bond->dev, dev);
rtnl_unlock();
if (res) {
ret = res;
@@ -367,9 +372,9 @@
}
/* set the slave MTU to the default */
if (dev->change_mtu) {
- dev->change_mtu(dev, 1500);
+ dev->change_mtu(dev, original_mtu);
} else {
- dev->mtu = 1500;
+ dev->mtu = original_mtu;
}
}
else {
@@ -563,6 +568,54 @@
static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate);
/*
+ * Show and store fail_over_mac. User only allowed to change the
+ * value when there are no slaves.
+ */
+static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attribute *attr, char *buf)
+{
+ struct bonding *bond = to_bond(d);
+
+ return sprintf(buf, "%d\n", bond->params.fail_over_mac) + 1;
+}
+
+static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count)
+{
+ int new_value;
+ int ret = count;
+ struct bonding *bond = to_bond(d);
+
+ if (bond->slave_cnt != 0) {
+ printk(KERN_ERR DRV_NAME
+ ": %s: Can't alter fail_over_mac with slaves in bond.\n",
+ bond->dev->name);
+ ret = -EPERM;
+ goto out;
+ }
+
+ if (sscanf(buf, "%d", &new_value) != 1) {
+ printk(KERN_ERR DRV_NAME
+ ": %s: no fail_over_mac value specified.\n",
+ bond->dev->name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ((new_value == 0) || (new_value == 1)) {
+ bond->params.fail_over_mac = new_value;
+ printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %d.\n",
+ bond->dev->name, new_value);
+ } else {
+ printk(KERN_INFO DRV_NAME
+ ": %s: Ignoring invalid fail_over_mac value %d.\n",
+ bond->dev->name, new_value);
+ }
+out:
+ return ret;
+}
+
+static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac);
+
+/*
* Show and set the arp timer interval. There are two tricky bits
* here. First, if ARP monitoring is activated, then we must disable
* MII monitoring. Second, if the ARP timer isn't running, we must
@@ -1383,6 +1436,7 @@
static struct attribute *per_bond_attrs[] = {
&dev_attr_slaves.attr,
&dev_attr_mode.attr,
+ &dev_attr_fail_over_mac.attr,
&dev_attr_arp_validate.attr,
&dev_attr_arp_interval.attr,
&dev_attr_arp_ip_target.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 2a6af7d..a8bbd56 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -22,8 +22,8 @@
#include "bond_3ad.h"
#include "bond_alb.h"
-#define DRV_VERSION "3.1.3"
-#define DRV_RELDATE "June 13, 2007"
+#define DRV_VERSION "3.2.0"
+#define DRV_RELDATE "September 13, 2007"
#define DRV_NAME "bonding"
#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
@@ -128,6 +128,7 @@
int arp_interval;
int arp_validate;
int use_carrier;
+ int fail_over_mac;
int updelay;
int downdelay;
int lacp_fast;
@@ -156,6 +157,7 @@
s8 link; /* one of BOND_LINK_XXXX */
s8 state; /* one of BOND_STATE_XXXX */
u32 original_flags;
+ u32 original_mtu;
u32 link_failure_count;
u16 speed;
u8 duplex;
@@ -185,6 +187,8 @@
struct timer_list mii_timer;
struct timer_list arp_timer;
s8 kill_timers;
+ s8 send_grat_arp;
+ s8 setup_by_slave;
struct net_device_stats stats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_entry;
@@ -292,6 +296,8 @@
struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
int bond_create(char *name, struct bond_params *params, struct bonding **newbond);
+void bond_destroy(struct bonding *bond);
+int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev);
void bond_deinit(struct net_device *bond_dev);
int bond_create_sysfs(void);
void bond_destroy_sysfs(void);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 563bf5f..7df31b5 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -4443,7 +4443,7 @@
{REG_MAC_COLL_EXCESS},
{REG_MAC_COLL_LATE}
};
-#define CAS_REG_LEN (sizeof(ethtool_register_table)/sizeof(int))
+#define CAS_REG_LEN ARRAY_SIZE(ethtool_register_table)
#define CAS_MAX_REGS (sizeof (u32)*CAS_REG_LEN)
static void cas_read_regs(struct cas *cp, u8 *ptr, int len)
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
new file mode 100644
index 0000000..ed53aaa
--- /dev/null
+++ b/drivers/net/cpmac.c
@@ -0,0 +1,1174 @@
+/*
+ * Copyright (C) 2006, 2007 Eugene Konev
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <asm/gpio.h>
+
+MODULE_AUTHOR("Eugene Konev <ejka@imfi.kspu.ru>");
+MODULE_DESCRIPTION("TI AR7 ethernet driver (CPMAC)");
+MODULE_LICENSE("GPL");
+
+static int debug_level = 8;
+static int dumb_switch;
+
+/* Next 2 are only used in cpmac_probe, so it's pointless to change them */
+module_param(debug_level, int, 0444);
+module_param(dumb_switch, int, 0444);
+
+MODULE_PARM_DESC(debug_level, "Number of NETIF_MSG bits to enable");
+MODULE_PARM_DESC(dumb_switch, "Assume switch is not connected to MDIO bus");
+
+#define CPMAC_VERSION "0.5.0"
+/* stolen from net/ieee80211.h */
+#ifndef MAC_FMT
+#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define MAC_ARG(x) ((u8*)(x))[0], ((u8*)(x))[1], ((u8*)(x))[2], \
+ ((u8*)(x))[3], ((u8*)(x))[4], ((u8*)(x))[5]
+#endif
+/* frame size + 802.1q tag */
+#define CPMAC_SKB_SIZE (ETH_FRAME_LEN + 4)
+#define CPMAC_QUEUES 8
+
+/* Ethernet registers */
+#define CPMAC_TX_CONTROL 0x0004
+#define CPMAC_TX_TEARDOWN 0x0008
+#define CPMAC_RX_CONTROL 0x0014
+#define CPMAC_RX_TEARDOWN 0x0018
+#define CPMAC_MBP 0x0100
+# define MBP_RXPASSCRC 0x40000000
+# define MBP_RXQOS 0x20000000
+# define MBP_RXNOCHAIN 0x10000000
+# define MBP_RXCMF 0x01000000
+# define MBP_RXSHORT 0x00800000
+# define MBP_RXCEF 0x00400000
+# define MBP_RXPROMISC 0x00200000
+# define MBP_PROMISCCHAN(channel) (((channel) & 0x7) << 16)
+# define MBP_RXBCAST 0x00002000
+# define MBP_BCASTCHAN(channel) (((channel) & 0x7) << 8)
+# define MBP_RXMCAST 0x00000020
+# define MBP_MCASTCHAN(channel) ((channel) & 0x7)
+#define CPMAC_UNICAST_ENABLE 0x0104
+#define CPMAC_UNICAST_CLEAR 0x0108
+#define CPMAC_MAX_LENGTH 0x010c
+#define CPMAC_BUFFER_OFFSET 0x0110
+#define CPMAC_MAC_CONTROL 0x0160
+# define MAC_TXPTYPE 0x00000200
+# define MAC_TXPACE 0x00000040
+# define MAC_MII 0x00000020
+# define MAC_TXFLOW 0x00000010
+# define MAC_RXFLOW 0x00000008
+# define MAC_MTEST 0x00000004
+# define MAC_LOOPBACK 0x00000002
+# define MAC_FDX 0x00000001
+#define CPMAC_MAC_STATUS 0x0164
+# define MAC_STATUS_QOS 0x00000004
+# define MAC_STATUS_RXFLOW 0x00000002
+# define MAC_STATUS_TXFLOW 0x00000001
+#define CPMAC_TX_INT_ENABLE 0x0178
+#define CPMAC_TX_INT_CLEAR 0x017c
+#define CPMAC_MAC_INT_VECTOR 0x0180
+# define MAC_INT_STATUS 0x00080000
+# define MAC_INT_HOST 0x00040000
+# define MAC_INT_RX 0x00020000
+# define MAC_INT_TX 0x00010000
+#define CPMAC_MAC_EOI_VECTOR 0x0184
+#define CPMAC_RX_INT_ENABLE 0x0198
+#define CPMAC_RX_INT_CLEAR 0x019c
+#define CPMAC_MAC_INT_ENABLE 0x01a8
+#define CPMAC_MAC_INT_CLEAR 0x01ac
+#define CPMAC_MAC_ADDR_LO(channel) (0x01b0 + (channel) * 4)
+#define CPMAC_MAC_ADDR_MID 0x01d0
+#define CPMAC_MAC_ADDR_HI 0x01d4
+#define CPMAC_MAC_HASH_LO 0x01d8
+#define CPMAC_MAC_HASH_HI 0x01dc
+#define CPMAC_TX_PTR(channel) (0x0600 + (channel) * 4)
+#define CPMAC_RX_PTR(channel) (0x0620 + (channel) * 4)
+#define CPMAC_TX_ACK(channel) (0x0640 + (channel) * 4)
+#define CPMAC_RX_ACK(channel) (0x0660 + (channel) * 4)
+#define CPMAC_REG_END 0x0680
+/*
+ * Rx/Tx statistics
+ * TODO: use some of them to fill stats in cpmac_stats()
+ */
+#define CPMAC_STATS_RX_GOOD 0x0200
+#define CPMAC_STATS_RX_BCAST 0x0204
+#define CPMAC_STATS_RX_MCAST 0x0208
+#define CPMAC_STATS_RX_PAUSE 0x020c
+#define CPMAC_STATS_RX_CRC 0x0210
+#define CPMAC_STATS_RX_ALIGN 0x0214
+#define CPMAC_STATS_RX_OVER 0x0218
+#define CPMAC_STATS_RX_JABBER 0x021c
+#define CPMAC_STATS_RX_UNDER 0x0220
+#define CPMAC_STATS_RX_FRAG 0x0224
+#define CPMAC_STATS_RX_FILTER 0x0228
+#define CPMAC_STATS_RX_QOSFILTER 0x022c
+#define CPMAC_STATS_RX_OCTETS 0x0230
+
+#define CPMAC_STATS_TX_GOOD 0x0234
+#define CPMAC_STATS_TX_BCAST 0x0238
+#define CPMAC_STATS_TX_MCAST 0x023c
+#define CPMAC_STATS_TX_PAUSE 0x0240
+#define CPMAC_STATS_TX_DEFER 0x0244
+#define CPMAC_STATS_TX_COLLISION 0x0248
+#define CPMAC_STATS_TX_SINGLECOLL 0x024c
+#define CPMAC_STATS_TX_MULTICOLL 0x0250
+#define CPMAC_STATS_TX_EXCESSCOLL 0x0254
+#define CPMAC_STATS_TX_LATECOLL 0x0258
+#define CPMAC_STATS_TX_UNDERRUN 0x025c
+#define CPMAC_STATS_TX_CARRIERSENSE 0x0260
+#define CPMAC_STATS_TX_OCTETS 0x0264
+
+#define cpmac_read(base, reg) (readl((void __iomem *)(base) + (reg)))
+#define cpmac_write(base, reg, val) (writel(val, (void __iomem *)(base) + \
+ (reg)))
+
+/* MDIO bus */
+#define CPMAC_MDIO_VERSION 0x0000
+#define CPMAC_MDIO_CONTROL 0x0004
+# define MDIOC_IDLE 0x80000000
+# define MDIOC_ENABLE 0x40000000
+# define MDIOC_PREAMBLE 0x00100000
+# define MDIOC_FAULT 0x00080000
+# define MDIOC_FAULTDETECT 0x00040000
+# define MDIOC_INTTEST 0x00020000
+# define MDIOC_CLKDIV(div) ((div) & 0xff)
+#define CPMAC_MDIO_ALIVE 0x0008
+#define CPMAC_MDIO_LINK 0x000c
+#define CPMAC_MDIO_ACCESS(channel) (0x0080 + (channel) * 8)
+# define MDIO_BUSY 0x80000000
+# define MDIO_WRITE 0x40000000
+# define MDIO_REG(reg) (((reg) & 0x1f) << 21)
+# define MDIO_PHY(phy) (((phy) & 0x1f) << 16)
+# define MDIO_DATA(data) ((data) & 0xffff)
+#define CPMAC_MDIO_PHYSEL(channel) (0x0084 + (channel) * 8)
+# define PHYSEL_LINKSEL 0x00000040
+# define PHYSEL_LINKINT 0x00000020
+
+struct cpmac_desc {
+ u32 hw_next;
+ u32 hw_data;
+ u16 buflen;
+ u16 bufflags;
+ u16 datalen;
+ u16 dataflags;
+#define CPMAC_SOP 0x8000
+#define CPMAC_EOP 0x4000
+#define CPMAC_OWN 0x2000
+#define CPMAC_EOQ 0x1000
+ struct sk_buff *skb;
+ struct cpmac_desc *next;
+ dma_addr_t mapping;
+ dma_addr_t data_mapping;
+};
+
+struct cpmac_priv {
+ spinlock_t lock;
+ spinlock_t rx_lock;
+ struct cpmac_desc *rx_head;
+ int ring_size;
+ struct cpmac_desc *desc_ring;
+ dma_addr_t dma_ring;
+ void __iomem *regs;
+ struct mii_bus *mii_bus;
+ struct phy_device *phy;
+ char phy_name[BUS_ID_SIZE];
+ int oldlink, oldspeed, oldduplex;
+ u32 msg_enable;
+ struct net_device *dev;
+ struct work_struct reset_work;
+ struct platform_device *pdev;
+};
+
+static irqreturn_t cpmac_irq(int, void *);
+static void cpmac_hw_start(struct net_device *dev);
+static void cpmac_hw_stop(struct net_device *dev);
+static int cpmac_stop(struct net_device *dev);
+static int cpmac_open(struct net_device *dev);
+
+static void cpmac_dump_regs(struct net_device *dev)
+{
+ int i;
+ struct cpmac_priv *priv = netdev_priv(dev);
+ for (i = 0; i < CPMAC_REG_END; i += 4) {
+ if (i % 16 == 0) {
+ if (i)
+ printk("\n");
+ printk(KERN_DEBUG "%s: reg[%p]:", dev->name,
+ priv->regs + i);
+ }
+ printk(" %08x", cpmac_read(priv->regs, i));
+ }
+ printk("\n");
+}
+
+static void cpmac_dump_desc(struct net_device *dev, struct cpmac_desc *desc)
+{
+ int i;
+ printk(KERN_DEBUG "%s: desc[%p]:", dev->name, desc);
+ for (i = 0; i < sizeof(*desc) / 4; i++)
+ printk(" %08x", ((u32 *)desc)[i]);
+ printk("\n");
+}
+
+static void cpmac_dump_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ int i;
+ printk(KERN_DEBUG "%s: skb 0x%p, len=%d\n", dev->name, skb, skb->len);
+ for (i = 0; i < skb->len; i++) {
+ if (i % 16 == 0) {
+ if (i)
+ printk("\n");
+ printk(KERN_DEBUG "%s: data[%p]:", dev->name,
+ skb->data + i);
+ }
+ printk(" %02x", ((u8 *)skb->data)[i]);
+ }
+ printk("\n");
+}
+
+static int cpmac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+ u32 val;
+
+ while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY)
+ cpu_relax();
+ cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_REG(reg) |
+ MDIO_PHY(phy_id));
+ while ((val = cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0))) & MDIO_BUSY)
+ cpu_relax();
+ return MDIO_DATA(val);
+}
+
+static int cpmac_mdio_write(struct mii_bus *bus, int phy_id,
+ int reg, u16 val)
+{
+ while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY)
+ cpu_relax();
+ cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_WRITE |
+ MDIO_REG(reg) | MDIO_PHY(phy_id) | MDIO_DATA(val));
+ return 0;
+}
+
+static int cpmac_mdio_reset(struct mii_bus *bus)
+{
+ ar7_device_reset(AR7_RESET_BIT_MDIO);
+ cpmac_write(bus->priv, CPMAC_MDIO_CONTROL, MDIOC_ENABLE |
+ MDIOC_CLKDIV(ar7_cpmac_freq() / 2200000 - 1));
+ return 0;
+}
+
+static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, };
+
+static struct mii_bus cpmac_mii = {
+ .name = "cpmac-mii",
+ .read = cpmac_mdio_read,
+ .write = cpmac_mdio_write,
+ .reset = cpmac_mdio_reset,
+ .irq = mii_irqs,
+};
+
+static int cpmac_config(struct net_device *dev, struct ifmap *map)
+{
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+
+ /* Don't allow changing the I/O address */
+ if (map->base_addr != dev->base_addr)
+ return -EOPNOTSUPP;
+
+ /* ignore other fields */
+ return 0;
+}
+
+static void cpmac_set_multicast_list(struct net_device *dev)
+{
+ struct dev_mc_list *iter;
+ int i;
+ u8 tmp;
+ u32 mbp, bit, hash[2] = { 0, };
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ mbp = cpmac_read(priv->regs, CPMAC_MBP);
+ if (dev->flags & IFF_PROMISC) {
+ cpmac_write(priv->regs, CPMAC_MBP, (mbp & ~MBP_PROMISCCHAN(0)) |
+ MBP_RXPROMISC);
+ } else {
+ cpmac_write(priv->regs, CPMAC_MBP, mbp & ~MBP_RXPROMISC);
+ if (dev->flags & IFF_ALLMULTI) {
+ /* enable all multicast mode */
+ cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, 0xffffffff);
+ cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, 0xffffffff);
+ } else {
+ /*
+ * cpmac uses some strange mac address hashing
+ * (not crc32)
+ */
+ for (i = 0, iter = dev->mc_list; i < dev->mc_count;
+ i++, iter = iter->next) {
+ bit = 0;
+ tmp = iter->dmi_addr[0];
+ bit ^= (tmp >> 2) ^ (tmp << 4);
+ tmp = iter->dmi_addr[1];
+ bit ^= (tmp >> 4) ^ (tmp << 2);
+ tmp = iter->dmi_addr[2];
+ bit ^= (tmp >> 6) ^ tmp;
+ tmp = iter->dmi_addr[3];
+ bit ^= (tmp >> 2) ^ (tmp << 4);
+ tmp = iter->dmi_addr[4];
+ bit ^= (tmp >> 4) ^ (tmp << 2);
+ tmp = iter->dmi_addr[5];
+ bit ^= (tmp >> 6) ^ tmp;
+ bit &= 0x3f;
+ hash[bit / 32] |= 1 << (bit % 32);
+ }
+
+ cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, hash[0]);
+ cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, hash[1]);
+ }
+ }
+}
+
+static struct sk_buff *cpmac_rx_one(struct net_device *dev,
+ struct cpmac_priv *priv,
+ struct cpmac_desc *desc)
+{
+ struct sk_buff *skb, *result = NULL;
+
+ if (unlikely(netif_msg_hw(priv)))
+ cpmac_dump_desc(dev, desc);
+ cpmac_write(priv->regs, CPMAC_RX_ACK(0), (u32)desc->mapping);
+ if (unlikely(!desc->datalen)) {
+ if (netif_msg_rx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING "%s: rx: spurious interrupt\n",
+ dev->name);
+ return NULL;
+ }
+
+ skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE);
+ if (likely(skb)) {
+ skb_reserve(skb, 2);
+ skb_put(desc->skb, desc->datalen);
+ desc->skb->protocol = eth_type_trans(desc->skb, dev);
+ desc->skb->ip_summed = CHECKSUM_NONE;
+ dev->stats.rx_packets++;
+ dev->stats.rx_bytes += desc->datalen;
+ result = desc->skb;
+ dma_unmap_single(&dev->dev, desc->data_mapping, CPMAC_SKB_SIZE,
+ DMA_FROM_DEVICE);
+ desc->skb = skb;
+ desc->data_mapping = dma_map_single(&dev->dev, skb->data,
+ CPMAC_SKB_SIZE,
+ DMA_FROM_DEVICE);
+ desc->hw_data = (u32)desc->data_mapping;
+ if (unlikely(netif_msg_pktdata(priv))) {
+ printk(KERN_DEBUG "%s: received packet:\n", dev->name);
+ cpmac_dump_skb(dev, result);
+ }
+ } else {
+ if (netif_msg_rx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING
+ "%s: low on skbs, dropping packet\n", dev->name);
+ dev->stats.rx_dropped++;
+ }
+
+ desc->buflen = CPMAC_SKB_SIZE;
+ desc->dataflags = CPMAC_OWN;
+
+ return result;
+}
+
+static int cpmac_poll(struct net_device *dev, int *budget)
+{
+ struct sk_buff *skb;
+ struct cpmac_desc *desc;
+ int received = 0, quota = min(dev->quota, *budget);
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ spin_lock(&priv->rx_lock);
+ if (unlikely(!priv->rx_head)) {
+ if (netif_msg_rx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING "%s: rx: polling, but no queue\n",
+ dev->name);
+ netif_rx_complete(dev);
+ return 0;
+ }
+
+ desc = priv->rx_head;
+ while ((received < quota) && ((desc->dataflags & CPMAC_OWN) == 0)) {
+ skb = cpmac_rx_one(dev, priv, desc);
+ if (likely(skb)) {
+ netif_receive_skb(skb);
+ received++;
+ }
+ desc = desc->next;
+ }
+
+ priv->rx_head = desc;
+ spin_unlock(&priv->rx_lock);
+ *budget -= received;
+ dev->quota -= received;
+ if (unlikely(netif_msg_rx_status(priv)))
+ printk(KERN_DEBUG "%s: poll processed %d packets\n", dev->name,
+ received);
+ if (desc->dataflags & CPMAC_OWN) {
+ netif_rx_complete(dev);
+ cpmac_write(priv->regs, CPMAC_RX_PTR(0), (u32)desc->mapping);
+ cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ int queue, len;
+ struct cpmac_desc *desc;
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ if (unlikely(skb_padto(skb, ETH_ZLEN))) {
+ if (netif_msg_tx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING
+ "%s: tx: padding failed, dropping\n", dev->name);
+ spin_lock(&priv->lock);
+ dev->stats.tx_dropped++;
+ spin_unlock(&priv->lock);
+ return -ENOMEM;
+ }
+
+ len = max(skb->len, ETH_ZLEN);
+ queue = skb->queue_mapping;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ netif_stop_subqueue(dev, queue);
+#else
+ netif_stop_queue(dev);
+#endif
+
+ desc = &priv->desc_ring[queue];
+ if (unlikely(desc->dataflags & CPMAC_OWN)) {
+ if (netif_msg_tx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING "%s: tx dma ring full, dropping\n",
+ dev->name);
+ spin_lock(&priv->lock);
+ dev->stats.tx_dropped++;
+ spin_unlock(&priv->lock);
+ dev_kfree_skb_any(skb);
+ return -ENOMEM;
+ }
+
+ spin_lock(&priv->lock);
+ dev->trans_start = jiffies;
+ spin_unlock(&priv->lock);
+ desc->dataflags = CPMAC_SOP | CPMAC_EOP | CPMAC_OWN;
+ desc->skb = skb;
+ desc->data_mapping = dma_map_single(&dev->dev, skb->data, len,
+ DMA_TO_DEVICE);
+ desc->hw_data = (u32)desc->data_mapping;
+ desc->datalen = len;
+ desc->buflen = len;
+ if (unlikely(netif_msg_tx_queued(priv)))
+ printk(KERN_DEBUG "%s: sending 0x%p, len=%d\n", dev->name, skb,
+ skb->len);
+ if (unlikely(netif_msg_hw(priv)))
+ cpmac_dump_desc(dev, desc);
+ if (unlikely(netif_msg_pktdata(priv)))
+ cpmac_dump_skb(dev, skb);
+ cpmac_write(priv->regs, CPMAC_TX_PTR(queue), (u32)desc->mapping);
+
+ return 0;
+}
+
+static void cpmac_end_xmit(struct net_device *dev, int queue)
+{
+ struct cpmac_desc *desc;
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ desc = &priv->desc_ring[queue];
+ cpmac_write(priv->regs, CPMAC_TX_ACK(queue), (u32)desc->mapping);
+ if (likely(desc->skb)) {
+ spin_lock(&priv->lock);
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += desc->skb->len;
+ spin_unlock(&priv->lock);
+ dma_unmap_single(&dev->dev, desc->data_mapping, desc->skb->len,
+ DMA_TO_DEVICE);
+
+ if (unlikely(netif_msg_tx_done(priv)))
+ printk(KERN_DEBUG "%s: sent 0x%p, len=%d\n", dev->name,
+ desc->skb, desc->skb->len);
+
+ dev_kfree_skb_irq(desc->skb);
+ desc->skb = NULL;
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ if (netif_subqueue_stopped(dev, queue))
+ netif_wake_subqueue(dev, queue);
+#else
+ if (netif_queue_stopped(dev))
+ netif_wake_queue(dev);
+#endif
+ } else {
+ if (netif_msg_tx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING
+ "%s: end_xmit: spurious interrupt\n", dev->name);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ if (netif_subqueue_stopped(dev, queue))
+ netif_wake_subqueue(dev, queue);
+#else
+ if (netif_queue_stopped(dev))
+ netif_wake_queue(dev);
+#endif
+ }
+}
+
+static void cpmac_hw_stop(struct net_device *dev)
+{
+ int i;
+ struct cpmac_priv *priv = netdev_priv(dev);
+ struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data;
+
+ ar7_device_reset(pdata->reset_bit);
+ cpmac_write(priv->regs, CPMAC_RX_CONTROL,
+ cpmac_read(priv->regs, CPMAC_RX_CONTROL) & ~1);
+ cpmac_write(priv->regs, CPMAC_TX_CONTROL,
+ cpmac_read(priv->regs, CPMAC_TX_CONTROL) & ~1);
+ for (i = 0; i < 8; i++) {
+ cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0);
+ cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0);
+ }
+ cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_MAC_CONTROL,
+ cpmac_read(priv->regs, CPMAC_MAC_CONTROL) & ~MAC_MII);
+}
+
+static void cpmac_hw_start(struct net_device *dev)
+{
+ int i;
+ struct cpmac_priv *priv = netdev_priv(dev);
+ struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data;
+
+ ar7_device_reset(pdata->reset_bit);
+ for (i = 0; i < 8; i++) {
+ cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0);
+ cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0);
+ }
+ cpmac_write(priv->regs, CPMAC_RX_PTR(0), priv->rx_head->mapping);
+
+ cpmac_write(priv->regs, CPMAC_MBP, MBP_RXSHORT | MBP_RXBCAST |
+ MBP_RXMCAST);
+ cpmac_write(priv->regs, CPMAC_BUFFER_OFFSET, 0);
+ for (i = 0; i < 8; i++)
+ cpmac_write(priv->regs, CPMAC_MAC_ADDR_LO(i), dev->dev_addr[5]);
+ cpmac_write(priv->regs, CPMAC_MAC_ADDR_MID, dev->dev_addr[4]);
+ cpmac_write(priv->regs, CPMAC_MAC_ADDR_HI, dev->dev_addr[0] |
+ (dev->dev_addr[1] << 8) | (dev->dev_addr[2] << 16) |
+ (dev->dev_addr[3] << 24));
+ cpmac_write(priv->regs, CPMAC_MAX_LENGTH, CPMAC_SKB_SIZE);
+ cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff);
+ cpmac_write(priv->regs, CPMAC_UNICAST_ENABLE, 1);
+ cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1);
+ cpmac_write(priv->regs, CPMAC_TX_INT_ENABLE, 0xff);
+ cpmac_write(priv->regs, CPMAC_MAC_INT_ENABLE, 3);
+
+ cpmac_write(priv->regs, CPMAC_RX_CONTROL,
+ cpmac_read(priv->regs, CPMAC_RX_CONTROL) | 1);
+ cpmac_write(priv->regs, CPMAC_TX_CONTROL,
+ cpmac_read(priv->regs, CPMAC_TX_CONTROL) | 1);
+ cpmac_write(priv->regs, CPMAC_MAC_CONTROL,
+ cpmac_read(priv->regs, CPMAC_MAC_CONTROL) | MAC_MII |
+ MAC_FDX);
+}
+
+static void cpmac_clear_rx(struct net_device *dev)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+ struct cpmac_desc *desc;
+ int i;
+ if (unlikely(!priv->rx_head))
+ return;
+ desc = priv->rx_head;
+ for (i = 0; i < priv->ring_size; i++) {
+ if ((desc->dataflags & CPMAC_OWN) == 0) {
+ if (netif_msg_rx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING "%s: packet dropped\n",
+ dev->name);
+ if (unlikely(netif_msg_hw(priv)))
+ cpmac_dump_desc(dev, desc);
+ desc->dataflags = CPMAC_OWN;
+ dev->stats.rx_dropped++;
+ }
+ desc = desc->next;
+ }
+}
+
+static void cpmac_clear_tx(struct net_device *dev)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+ int i;
+ if (unlikely(!priv->desc_ring))
+ return;
+ for (i = 0; i < CPMAC_QUEUES; i++)
+ if (priv->desc_ring[i].skb) {
+ dev_kfree_skb_any(priv->desc_ring[i].skb);
+ if (netif_subqueue_stopped(dev, i))
+ netif_wake_subqueue(dev, i);
+ }
+}
+
+static void cpmac_hw_error(struct work_struct *work)
+{
+ struct cpmac_priv *priv =
+ container_of(work, struct cpmac_priv, reset_work);
+
+ spin_lock(&priv->rx_lock);
+ cpmac_clear_rx(priv->dev);
+ spin_unlock(&priv->rx_lock);
+ cpmac_clear_tx(priv->dev);
+ cpmac_hw_start(priv->dev);
+ netif_start_queue(priv->dev);
+}
+
+static irqreturn_t cpmac_irq(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct cpmac_priv *priv;
+ int queue;
+ u32 status;
+
+ if (!dev)
+ return IRQ_NONE;
+
+ priv = netdev_priv(dev);
+
+ status = cpmac_read(priv->regs, CPMAC_MAC_INT_VECTOR);
+
+ if (unlikely(netif_msg_intr(priv)))
+ printk(KERN_DEBUG "%s: interrupt status: 0x%08x\n", dev->name,
+ status);
+
+ if (status & MAC_INT_TX)
+ cpmac_end_xmit(dev, (status & 7));
+
+ if (status & MAC_INT_RX) {
+ queue = (status >> 8) & 7;
+ netif_rx_schedule(dev);
+ cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 1 << queue);
+ }
+
+ cpmac_write(priv->regs, CPMAC_MAC_EOI_VECTOR, 0);
+
+ if (unlikely(status & (MAC_INT_HOST | MAC_INT_STATUS))) {
+ if (netif_msg_drv(priv) && net_ratelimit())
+ printk(KERN_ERR "%s: hw error, resetting...\n",
+ dev->name);
+ netif_stop_queue(dev);
+ cpmac_hw_stop(dev);
+ schedule_work(&priv->reset_work);
+ if (unlikely(netif_msg_hw(priv)))
+ cpmac_dump_regs(dev);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void cpmac_tx_timeout(struct net_device *dev)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+ int i;
+
+ spin_lock(&priv->lock);
+ dev->stats.tx_errors++;
+ spin_unlock(&priv->lock);
+ if (netif_msg_tx_err(priv) && net_ratelimit())
+ printk(KERN_WARNING "%s: transmit timeout\n", dev->name);
+ /*
+ * FIXME: waking up random queue is not the best thing to
+ * do... on the other hand why we got here at all?
+ */
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ for (i = 0; i < CPMAC_QUEUES; i++)
+ if (priv->desc_ring[i].skb) {
+ dev_kfree_skb_any(priv->desc_ring[i].skb);
+ netif_wake_subqueue(dev, i);
+ break;
+ }
+#else
+ if (priv->desc_ring[0].skb)
+ dev_kfree_skb_any(priv->desc_ring[0].skb);
+ netif_wake_queue(dev);
+#endif
+}
+
+static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+ if (!(netif_running(dev)))
+ return -EINVAL;
+ if (!priv->phy)
+ return -EINVAL;
+ if ((cmd == SIOCGMIIPHY) || (cmd == SIOCGMIIREG) ||
+ (cmd == SIOCSMIIREG))
+ return phy_mii_ioctl(priv->phy, if_mii(ifr), cmd);
+
+ return -EOPNOTSUPP;
+}
+
+static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ if (priv->phy)
+ return phy_ethtool_gset(priv->phy, cmd);
+
+ return -EINVAL;
+}
+
+static int cpmac_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (priv->phy)
+ return phy_ethtool_sset(priv->phy, cmd);
+
+ return -EINVAL;
+}
+
+static void cpmac_get_ringparam(struct net_device *dev, struct ethtool_ringparam* ring)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ ring->rx_max_pending = 1024;
+ ring->rx_mini_max_pending = 1;
+ ring->rx_jumbo_max_pending = 1;
+ ring->tx_max_pending = 1;
+
+ ring->rx_pending = priv->ring_size;
+ ring->rx_mini_pending = 1;
+ ring->rx_jumbo_pending = 1;
+ ring->tx_pending = 1;
+}
+
+static int cpmac_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ring)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+
+ if (dev->flags && IFF_UP)
+ return -EBUSY;
+ priv->ring_size = ring->rx_pending;
+ return 0;
+}
+
+static void cpmac_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strcpy(info->driver, "cpmac");
+ strcpy(info->version, CPMAC_VERSION);
+ info->fw_version[0] = '\0';
+ sprintf(info->bus_info, "%s", "cpmac");
+ info->regdump_len = 0;
+}
+
+static const struct ethtool_ops cpmac_ethtool_ops = {
+ .get_settings = cpmac_get_settings,
+ .set_settings = cpmac_set_settings,
+ .get_drvinfo = cpmac_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ringparam = cpmac_get_ringparam,
+ .set_ringparam = cpmac_set_ringparam,
+};
+
+static void cpmac_adjust_link(struct net_device *dev)
+{
+ struct cpmac_priv *priv = netdev_priv(dev);
+ int new_state = 0;
+
+ spin_lock(&priv->lock);
+ if (priv->phy->link) {
+ netif_start_queue(dev);
+ if (priv->phy->duplex != priv->oldduplex) {
+ new_state = 1;
+ priv->oldduplex = priv->phy->duplex;
+ }
+
+ if (priv->phy->speed != priv->oldspeed) {
+ new_state = 1;
+ priv->oldspeed = priv->phy->speed;
+ }
+
+ if (!priv->oldlink) {
+ new_state = 1;
+ priv->oldlink = 1;
+ netif_schedule(dev);
+ }
+ } else if (priv->oldlink) {
+ netif_stop_queue(dev);
+ new_state = 1;
+ priv->oldlink = 0;
+ priv->oldspeed = 0;
+ priv->oldduplex = -1;
+ }
+
+ if (new_state && netif_msg_link(priv) && net_ratelimit())
+ phy_print_status(priv->phy);
+
+ spin_unlock(&priv->lock);
+}
+
+static int cpmac_open(struct net_device *dev)
+{
+ int i, size, res;
+ struct cpmac_priv *priv = netdev_priv(dev);
+ struct resource *mem;
+ struct cpmac_desc *desc;
+ struct sk_buff *skb;
+
+ priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link,
+ 0, PHY_INTERFACE_MODE_MII);
+ if (IS_ERR(priv->phy)) {
+ if (netif_msg_drv(priv))
+ printk(KERN_ERR "%s: Could not attach to PHY\n",
+ dev->name);
+ return PTR_ERR(priv->phy);
+ }
+
+ mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs");
+ if (!request_mem_region(mem->start, mem->end - mem->start, dev->name)) {
+ if (netif_msg_drv(priv))
+ printk(KERN_ERR "%s: failed to request registers\n",
+ dev->name);
+ res = -ENXIO;
+ goto fail_reserve;
+ }
+
+ priv->regs = ioremap(mem->start, mem->end - mem->start);
+ if (!priv->regs) {
+ if (netif_msg_drv(priv))
+ printk(KERN_ERR "%s: failed to remap registers\n",
+ dev->name);
+ res = -ENXIO;
+ goto fail_remap;
+ }
+
+ size = priv->ring_size + CPMAC_QUEUES;
+ priv->desc_ring = dma_alloc_coherent(&dev->dev,
+ sizeof(struct cpmac_desc) * size,
+ &priv->dma_ring,
+ GFP_KERNEL);
+ if (!priv->desc_ring) {
+ res = -ENOMEM;
+ goto fail_alloc;
+ }
+
+ for (i = 0; i < size; i++)
+ priv->desc_ring[i].mapping = priv->dma_ring + sizeof(*desc) * i;
+
+ priv->rx_head = &priv->desc_ring[CPMAC_QUEUES];
+ for (i = 0, desc = priv->rx_head; i < priv->ring_size; i++, desc++) {
+ skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE);
+ if (unlikely(!skb)) {
+ res = -ENOMEM;
+ goto fail_desc;
+ }
+ skb_reserve(skb, 2);
+ desc->skb = skb;
+ desc->data_mapping = dma_map_single(&dev->dev, skb->data,
+ CPMAC_SKB_SIZE,
+ DMA_FROM_DEVICE);
+ desc->hw_data = (u32)desc->data_mapping;
+ desc->buflen = CPMAC_SKB_SIZE;
+ desc->dataflags = CPMAC_OWN;
+ desc->next = &priv->rx_head[(i + 1) % priv->ring_size];
+ desc->hw_next = (u32)desc->next->mapping;
+ }
+
+ if ((res = request_irq(dev->irq, cpmac_irq, IRQF_SHARED,
+ dev->name, dev))) {
+ if (netif_msg_drv(priv))
+ printk(KERN_ERR "%s: failed to obtain irq\n",
+ dev->name);
+ goto fail_irq;
+ }
+
+ INIT_WORK(&priv->reset_work, cpmac_hw_error);
+ cpmac_hw_start(dev);
+
+ priv->phy->state = PHY_CHANGELINK;
+ phy_start(priv->phy);
+
+ return 0;
+
+fail_irq:
+fail_desc:
+ for (i = 0; i < priv->ring_size; i++) {
+ if (priv->rx_head[i].skb) {
+ dma_unmap_single(&dev->dev,
+ priv->rx_head[i].data_mapping,
+ CPMAC_SKB_SIZE,
+ DMA_FROM_DEVICE);
+ kfree_skb(priv->rx_head[i].skb);
+ }
+ }
+fail_alloc:
+ kfree(priv->desc_ring);
+ iounmap(priv->regs);
+
+fail_remap:
+ release_mem_region(mem->start, mem->end - mem->start);
+
+fail_reserve:
+ phy_disconnect(priv->phy);
+
+ return res;
+}
+
+static int cpmac_stop(struct net_device *dev)
+{
+ int i;
+ struct cpmac_priv *priv = netdev_priv(dev);
+ struct resource *mem;
+
+ netif_stop_queue(dev);
+
+ cancel_work_sync(&priv->reset_work);
+ phy_stop(priv->phy);
+ phy_disconnect(priv->phy);
+ priv->phy = NULL;
+
+ cpmac_hw_stop(dev);
+
+ for (i = 0; i < 8; i++)
+ cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0);
+ cpmac_write(priv->regs, CPMAC_RX_PTR(0), 0);
+ cpmac_write(priv->regs, CPMAC_MBP, 0);
+
+ free_irq(dev->irq, dev);
+ iounmap(priv->regs);
+ mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs");
+ release_mem_region(mem->start, mem->end - mem->start);
+ priv->rx_head = &priv->desc_ring[CPMAC_QUEUES];
+ for (i = 0; i < priv->ring_size; i++) {
+ if (priv->rx_head[i].skb) {
+ dma_unmap_single(&dev->dev,
+ priv->rx_head[i].data_mapping,
+ CPMAC_SKB_SIZE,
+ DMA_FROM_DEVICE);
+ kfree_skb(priv->rx_head[i].skb);
+ }
+ }
+
+ dma_free_coherent(&dev->dev, sizeof(struct cpmac_desc) *
+ (CPMAC_QUEUES + priv->ring_size),
+ priv->desc_ring, priv->dma_ring);
+ return 0;
+}
+
+static int external_switch;
+
+static int __devinit cpmac_probe(struct platform_device *pdev)
+{
+ int rc, phy_id;
+ struct resource *mem;
+ struct cpmac_priv *priv;
+ struct net_device *dev;
+ struct plat_cpmac_data *pdata;
+
+ pdata = pdev->dev.platform_data;
+
+ for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) {
+ if (!(pdata->phy_mask & (1 << phy_id)))
+ continue;
+ if (!cpmac_mii.phy_map[phy_id])
+ continue;
+ break;
+ }
+
+ if (phy_id == PHY_MAX_ADDR) {
+ if (external_switch || dumb_switch)
+ phy_id = 0;
+ else {
+ printk(KERN_ERR "cpmac: no PHY present\n");
+ return -ENODEV;
+ }
+ }
+
+ dev = alloc_etherdev_mq(sizeof(*priv), CPMAC_QUEUES);
+
+ if (!dev) {
+ printk(KERN_ERR "cpmac: Unable to allocate net_device\n");
+ return -ENOMEM;
+ }
+
+ platform_set_drvdata(pdev, dev);
+ priv = netdev_priv(dev);
+
+ priv->pdev = pdev;
+ mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+ if (!mem) {
+ rc = -ENODEV;
+ goto fail;
+ }
+
+ dev->irq = platform_get_irq_byname(pdev, "irq");
+
+ dev->open = cpmac_open;
+ dev->stop = cpmac_stop;
+ dev->set_config = cpmac_config;
+ dev->hard_start_xmit = cpmac_start_xmit;
+ dev->do_ioctl = cpmac_ioctl;
+ dev->set_multicast_list = cpmac_set_multicast_list;
+ dev->tx_timeout = cpmac_tx_timeout;
+ dev->ethtool_ops = &cpmac_ethtool_ops;
+ dev->poll = cpmac_poll;
+ dev->weight = 64;
+ dev->features |= NETIF_F_MULTI_QUEUE;
+
+ spin_lock_init(&priv->lock);
+ spin_lock_init(&priv->rx_lock);
+ priv->dev = dev;
+ priv->ring_size = 64;
+ priv->msg_enable = netif_msg_init(debug_level, 0xff);
+ memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr));
+ if (phy_id == 31) {
+ snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT,
+ cpmac_mii.id, phy_id);
+ } else
+ snprintf(priv->phy_name, BUS_ID_SIZE, "fixed@%d:%d", 100, 1);
+
+ if ((rc = register_netdev(dev))) {
+ printk(KERN_ERR "cpmac: error %i registering device %s\n", rc,
+ dev->name);
+ goto fail;
+ }
+
+ if (netif_msg_probe(priv)) {
+ printk(KERN_INFO
+ "cpmac: device %s (regs: %p, irq: %d, phy: %s, mac: "
+ MAC_FMT ")\n", dev->name, (void *)mem->start, dev->irq,
+ priv->phy_name, MAC_ARG(dev->dev_addr));
+ }
+ return 0;
+
+fail:
+ free_netdev(dev);
+ return rc;
+}
+
+static int __devexit cpmac_remove(struct platform_device *pdev)
+{
+ struct net_device *dev = platform_get_drvdata(pdev);
+ unregister_netdev(dev);
+ free_netdev(dev);
+ return 0;
+}
+
+static struct platform_driver cpmac_driver = {
+ .driver.name = "cpmac",
+ .probe = cpmac_probe,
+ .remove = __devexit_p(cpmac_remove),
+};
+
+int __devinit cpmac_init(void)
+{
+ u32 mask;
+ int i, res;
+
+ cpmac_mii.priv = ioremap(AR7_REGS_MDIO, 256);
+
+ if (!cpmac_mii.priv) {
+ printk(KERN_ERR "Can't ioremap mdio registers\n");
+ return -ENXIO;
+ }
+
+#warning FIXME: unhardcode gpio&reset bits
+ ar7_gpio_disable(26);
+ ar7_gpio_disable(27);
+ ar7_device_reset(AR7_RESET_BIT_CPMAC_LO);
+ ar7_device_reset(AR7_RESET_BIT_CPMAC_HI);
+ ar7_device_reset(AR7_RESET_BIT_EPHY);
+
+ cpmac_mii.reset(&cpmac_mii);
+
+ for (i = 0; i < 300000; i++)
+ if ((mask = cpmac_read(cpmac_mii.priv, CPMAC_MDIO_ALIVE)))
+ break;
+ else
+ cpu_relax();
+
+ mask &= 0x7fffffff;
+ if (mask & (mask - 1)) {
+ external_switch = 1;
+ mask = 0;
+ }
+
+ cpmac_mii.phy_mask = ~(mask | 0x80000000);
+
+ res = mdiobus_register(&cpmac_mii);
+ if (res)
+ goto fail_mii;
+
+ res = platform_driver_register(&cpmac_driver);
+ if (res)
+ goto fail_cpmac;
+
+ return 0;
+
+fail_cpmac:
+ mdiobus_unregister(&cpmac_mii);
+
+fail_mii:
+ iounmap(cpmac_mii.priv);
+
+ return res;
+}
+
+void __devexit cpmac_exit(void)
+{
+ platform_driver_unregister(&cpmac_driver);
+ mdiobus_unregister(&cpmac_mii);
+ iounmap(cpmac_mii.priv);
+}
+
+module_init(cpmac_init);
+module_exit(cpmac_exit);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 0db5e6f..558440c 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -168,7 +168,6 @@
struct gfar_private *priv = NULL;
struct gianfar_platform_data *einfo;
struct resource *r;
- int idx;
int err = 0;
DECLARE_MAC_BUF(mac);
@@ -261,7 +260,9 @@
dev->hard_start_xmit = gfar_start_xmit;
dev->tx_timeout = gfar_timeout;
dev->watchdog_timeo = TX_TIMEOUT;
+#ifdef CONFIG_GFAR_NAPI
netif_napi_add(dev, &priv->napi, gfar_poll, GFAR_DEV_WEIGHT);
+#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = gfar_netpoll;
#endif
@@ -931,9 +932,14 @@
/* Returns 0 for success. */
static int gfar_enet_open(struct net_device *dev)
{
+#ifdef CONFIG_GFAR_NAPI
+ struct gfar_private *priv = netdev_priv(dev);
+#endif
int err;
+#ifdef CONFIG_GFAR_NAPI
napi_enable(&priv->napi);
+#endif
/* Initialize a bunch of registers */
init_registers(dev);
@@ -943,13 +949,17 @@
err = init_phy(dev);
if(err) {
+#ifdef CONFIG_GFAR_NAPI
napi_disable(&priv->napi);
+#endif
return err;
}
err = startup_gfar(dev);
if (err)
+#ifdef CONFIG_GFAR_NAPI
napi_disable(&priv->napi);
+#endif
netif_start_queue(dev);
@@ -1103,7 +1113,9 @@
{
struct gfar_private *priv = netdev_priv(dev);
+#ifdef CONFIG_GFAR_NAPI
napi_disable(&priv->napi);
+#endif
stop_gfar(dev);
diff --git a/drivers/net/ibm_emac/ibm_emac_mal.c b/drivers/net/ibm_emac/ibm_emac_mal.c
index 4e49e8c..dcd8826 100644
--- a/drivers/net/ibm_emac/ibm_emac_mal.c
+++ b/drivers/net/ibm_emac/ibm_emac_mal.c
@@ -413,7 +413,10 @@
ocpdev->def->index);
return -ENOMEM;
}
- mal->dcrbase = maldata->dcr_base;
+
+ /* XXX This only works for native dcr for now */
+ mal->dcrhost = dcr_map(NULL, maldata->dcr_base, 0);
+
mal->def = ocpdev->def;
INIT_LIST_HEAD(&mal->poll_list);
diff --git a/drivers/net/ibm_emac/ibm_emac_mal.h b/drivers/net/ibm_emac/ibm_emac_mal.h
index 8f54d62..b8adbe6 100644
--- a/drivers/net/ibm_emac/ibm_emac_mal.h
+++ b/drivers/net/ibm_emac/ibm_emac_mal.h
@@ -191,7 +191,6 @@
};
struct ibm_ocp_mal {
- int dcrbase;
dcr_host_t dcrhost;
struct list_head poll_list;
@@ -209,12 +208,12 @@
static inline u32 get_mal_dcrn(struct ibm_ocp_mal *mal, int reg)
{
- return dcr_read(mal->dcrhost, mal->dcrbase + reg);
+ return dcr_read(mal->dcrhost, reg);
}
static inline void set_mal_dcrn(struct ibm_ocp_mal *mal, int reg, u32 val)
{
- dcr_write(mal->dcrhost, mal->dcrbase + reg, val);
+ dcr_write(mal->dcrhost, reg, val);
}
/* Register MAL devices */
diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c
index 5885411..39f4cb6 100644
--- a/drivers/net/ibm_newemac/mal.c
+++ b/drivers/net/ibm_newemac/mal.c
@@ -461,6 +461,7 @@
struct mal_instance *mal;
int err = 0, i, bd_size;
int index = mal_count++;
+ unsigned int dcr_base;
const u32 *prop;
u32 cfg;
@@ -497,14 +498,14 @@
}
mal->num_rx_chans = prop[0];
- mal->dcr_base = dcr_resource_start(ofdev->node, 0);
- if (mal->dcr_base == 0) {
+ dcr_base = dcr_resource_start(ofdev->node, 0);
+ if (dcr_base == 0) {
printk(KERN_ERR
"mal%d: can't find DCR resource!\n", index);
err = -ENODEV;
goto fail;
}
- mal->dcr_host = dcr_map(ofdev->node, mal->dcr_base, 0x100);
+ mal->dcr_host = dcr_map(ofdev->node, dcr_base, 0x100);
if (!DCR_MAP_OK(mal->dcr_host)) {
printk(KERN_ERR
"mal%d: failed to map DCRs !\n", index);
@@ -626,7 +627,7 @@
fail2:
dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma);
fail_unmap:
- dcr_unmap(mal->dcr_host, mal->dcr_base, 0x100);
+ dcr_unmap(mal->dcr_host, 0x100);
fail:
kfree(mal);
diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h
index cb1a16d..784edb8 100644
--- a/drivers/net/ibm_newemac/mal.h
+++ b/drivers/net/ibm_newemac/mal.h
@@ -185,7 +185,6 @@
struct mal_instance {
int version;
- int dcr_base;
dcr_host_t dcr_host;
int num_tx_chans; /* Number of TX channels */
@@ -213,12 +212,12 @@
static inline u32 get_mal_dcrn(struct mal_instance *mal, int reg)
{
- return dcr_read(mal->dcr_host, mal->dcr_base + reg);
+ return dcr_read(mal->dcr_host, reg);
}
static inline void set_mal_dcrn(struct mal_instance *mal, int reg, u32 val)
{
- dcr_write(mal->dcr_host, mal->dcr_base + reg, val);
+ dcr_write(mal->dcr_host, reg, val);
}
/* Register MAL devices */
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 3e5eca1..a82d8f9 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -840,7 +840,7 @@
/* test 1: SIR filter and back to back */
- for (j = 0; j < (sizeof (bauds) / sizeof (int)); ++j)
+ for (j = 0; j < ARRAY_SIZE(bauds); ++j)
{
int fir = (j > 1);
toshoboe_stopchip (self);
diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c
index d3825c8..5c154fe 100644
--- a/drivers/net/jazzsonic.c
+++ b/drivers/net/jazzsonic.c
@@ -208,7 +208,6 @@
struct sonic_local *lp;
struct resource *res;
int err = 0;
- int i;
DECLARE_MAC_BUF(mac);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index be25aa3..662b8d1 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -265,17 +265,16 @@
if (err)
goto out_free_netdev;
- err = 0;
net->loopback_dev = dev;
+ return 0;
-out:
- if (err)
- panic("loopback: Failed to register netdevice: %d\n", err);
- return err;
out_free_netdev:
free_netdev(dev);
- goto out;
+out:
+ if (net == &init_net)
+ panic("loopback: Failed to register netdevice: %d\n", err);
+ return err;
}
static __net_exit void loopback_net_exit(struct net *net)
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index d593175..37707a0 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -7,12 +7,12 @@
#define DEBUG
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/platform_device.h>
-#include <asm/io.h>
#include <asm/mips-boards/simint.h>
#include "mipsnet.h" /* actual device IO mapping */
@@ -33,9 +33,8 @@
if (available_len < len)
return -EFAULT;
- for (; len > 0; len--, kdata++) {
+ for (; len > 0; len--, kdata++)
*kdata = inb(mipsnet_reg_address(dev, rxDataBuffer));
- }
return inl(mipsnet_reg_address(dev, rxDataCount));
}
@@ -47,16 +46,15 @@
char *buf_ptr = skb->data;
pr_debug("%s: %s(): telling MIPSNET txDataCount(%d)\n",
- dev->name, __FUNCTION__, skb->len);
+ dev->name, __FUNCTION__, skb->len);
outl(skb->len, mipsnet_reg_address(dev, txDataCount));
pr_debug("%s: %s(): sending data to MIPSNET txDataBuffer(%d)\n",
- dev->name, __FUNCTION__, skb->len);
+ dev->name, __FUNCTION__, skb->len);
- for (; count_to_go; buf_ptr++, count_to_go--) {
+ for (; count_to_go; buf_ptr++, count_to_go--)
outb(*buf_ptr, mipsnet_reg_address(dev, txDataBuffer));
- }
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
@@ -67,7 +65,7 @@
static int mipsnet_xmit(struct sk_buff *skb, struct net_device *dev)
{
pr_debug("%s:%s(): transmitting %d bytes\n",
- dev->name, __FUNCTION__, skb->len);
+ dev->name, __FUNCTION__, skb->len);
/* Only one packet at a time. Once TXDONE interrupt is serviced, the
* queue will be restarted.
@@ -83,7 +81,8 @@
struct sk_buff *skb;
size_t len = count;
- if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) {
+ skb = alloc_skb(len + 2, GFP_KERNEL);
+ if (!skb) {
dev->stats.rx_dropped++;
return -ENOMEM;
}
@@ -96,7 +95,7 @@
skb->ip_summed = CHECKSUM_UNNECESSARY;
pr_debug("%s:%s(): pushing RXed data to kernel\n",
- dev->name, __FUNCTION__);
+ dev->name, __FUNCTION__);
netif_rx(skb);
dev->stats.rx_packets++;
@@ -114,42 +113,44 @@
if (irq == dev->irq) {
pr_debug("%s:%s(): irq %d for device\n",
- dev->name, __FUNCTION__, irq);
+ dev->name, __FUNCTION__, irq);
retval = IRQ_HANDLED;
interruptFlags =
inl(mipsnet_reg_address(dev, interruptControl));
pr_debug("%s:%s(): intCtl=0x%016llx\n", dev->name,
- __FUNCTION__, interruptFlags);
+ __FUNCTION__, interruptFlags);
if (interruptFlags & MIPSNET_INTCTL_TXDONE) {
pr_debug("%s:%s(): got TXDone\n",
- dev->name, __FUNCTION__);
+ dev->name, __FUNCTION__);
outl(MIPSNET_INTCTL_TXDONE,
mipsnet_reg_address(dev, interruptControl));
- // only one packet at a time, we are done.
+ /* only one packet at a time, we are done. */
netif_wake_queue(dev);
} else if (interruptFlags & MIPSNET_INTCTL_RXDONE) {
pr_debug("%s:%s(): got RX data\n",
- dev->name, __FUNCTION__);
+ dev->name, __FUNCTION__);
mipsnet_get_fromdev(dev,
- inl(mipsnet_reg_address(dev, rxDataCount)));
+ inl(mipsnet_reg_address(dev, rxDataCount)));
pr_debug("%s:%s(): clearing RX int\n",
- dev->name, __FUNCTION__);
+ dev->name, __FUNCTION__);
outl(MIPSNET_INTCTL_RXDONE,
mipsnet_reg_address(dev, interruptControl));
} else if (interruptFlags & MIPSNET_INTCTL_TESTBIT) {
pr_debug("%s:%s(): got test interrupt\n",
- dev->name, __FUNCTION__);
- // TESTBIT is cleared on read.
- // And takes effect after a write with 0
+ dev->name, __FUNCTION__);
+ /*
+ * TESTBIT is cleared on read.
+ * And takes effect after a write with 0
+ */
outl(0, mipsnet_reg_address(dev, interruptControl));
} else {
pr_debug("%s:%s(): no valid fags 0x%016llx\n",
- dev->name, __FUNCTION__, interruptFlags);
- // Maybe shared IRQ, just ignore, no clearing.
+ dev->name, __FUNCTION__, interruptFlags);
+ /* Maybe shared IRQ, just ignore, no clearing. */
retval = IRQ_NONE;
}
@@ -159,7 +160,7 @@
retval = IRQ_NONE;
}
return retval;
-} //mipsnet_interrupt()
+}
static int mipsnet_open(struct net_device *dev)
{
@@ -171,18 +172,18 @@
if (err) {
pr_debug("%s: %s(): can't get irq %d\n",
- dev->name, __FUNCTION__, dev->irq);
+ dev->name, __FUNCTION__, dev->irq);
release_region(dev->base_addr, MIPSNET_IO_EXTENT);
return err;
}
pr_debug("%s: %s(): got IO region at 0x%04lx and irq %d for dev.\n",
- dev->name, __FUNCTION__, dev->base_addr, dev->irq);
+ dev->name, __FUNCTION__, dev->base_addr, dev->irq);
netif_start_queue(dev);
- // test interrupt handler
+ /* test interrupt handler */
outl(MIPSNET_INTCTL_TESTBIT,
mipsnet_reg_address(dev, interruptControl));
@@ -199,8 +200,6 @@
static void mipsnet_set_mclist(struct net_device *dev)
{
- // we don't do anything
- return;
}
static int __init mipsnet_probe(struct device *dev)
@@ -226,13 +225,13 @@
*/
netdev->base_addr = 0x4200;
netdev->irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB0 +
- inl(mipsnet_reg_address(netdev, interruptInfo));
+ inl(mipsnet_reg_address(netdev, interruptInfo));
- // Get the io region now, get irq on open()
+ /* Get the io region now, get irq on open() */
if (!request_region(netdev->base_addr, MIPSNET_IO_EXTENT, "mipsnet")) {
pr_debug("%s: %s(): IO region {start: 0x%04lux, len: %d} "
- "for dev is not availble.\n", netdev->name,
- __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT);
+ "for dev is not availble.\n", netdev->name,
+ __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT);
err = -EBUSY;
goto out_free_netdev;
}
diff --git a/drivers/net/mipsnet.h b/drivers/net/mipsnet.h
index 026c732..0132c67 100644
--- a/drivers/net/mipsnet.h
+++ b/drivers/net/mipsnet.h
@@ -9,32 +9,34 @@
/*
* Id of this Net device, as seen by the core.
*/
-#define MIPS_NET_DEV_ID ((uint64_t) \
- ((uint64_t)'M'<< 0)| \
- ((uint64_t)'I'<< 8)| \
- ((uint64_t)'P'<<16)| \
- ((uint64_t)'S'<<24)| \
- ((uint64_t)'N'<<32)| \
- ((uint64_t)'E'<<40)| \
- ((uint64_t)'T'<<48)| \
- ((uint64_t)'0'<<56))
+#define MIPS_NET_DEV_ID ((uint64_t) \
+ ((uint64_t) 'M' << 0)| \
+ ((uint64_t) 'I' << 8)| \
+ ((uint64_t) 'P' << 16)| \
+ ((uint64_t) 'S' << 24)| \
+ ((uint64_t) 'N' << 32)| \
+ ((uint64_t) 'E' << 40)| \
+ ((uint64_t) 'T' << 48)| \
+ ((uint64_t) '0' << 56))
/*
* Net status/control block as seen by sw in the core.
* (Why not use bit fields? can't be bothered with cross-platform struct
* packing.)
*/
-typedef struct _net_control_block {
- /// dev info for probing
- /// reads as MIPSNET%d where %d is some form of version
- uint64_t devId; /*0x00 */
+struct net_control_block {
+ /*
+ * dev info for probing
+ * reads as MIPSNET%d where %d is some form of version
+ */
+ uint64_t devId; /* 0x00 */
/*
* read only busy flag.
* Set and cleared by the Net Device to indicate that an rx or a tx
* is in progress.
*/
- uint32_t busy; /*0x08 */
+ uint32_t busy; /* 0x08 */
/*
* Set by the Net Device.
@@ -43,16 +45,16 @@
* rxDataBuffer. The value will decrease till 0 until all the data
* from rxDataBuffer has been read.
*/
- uint32_t rxDataCount; /*0x0c */
+ uint32_t rxDataCount; /* 0x0c */
#define MIPSNET_MAX_RXTX_DATACOUNT (1<<16)
/*
- * Settable from the MIPS core, cleared by the Net Device.
- * The core should set the number of bytes it wants to send,
- * then it should write those bytes of data to txDataBuffer.
- * The device will clear txDataCount has been processed (not necessarily sent).
+ * Settable from the MIPS core, cleared by the Net Device. The core
+ * should set the number of bytes it wants to send, then it should
+ * write those bytes of data to txDataBuffer. The device will clear
+ * txDataCount has been processed (not necessarily sent).
*/
- uint32_t txDataCount; /*0x10 */
+ uint32_t txDataCount; /* 0x10 */
/*
* Interrupt control
@@ -69,39 +71,42 @@
* To clear the test interrupt, write 0 to this register.
*/
uint32_t interruptControl; /*0x14 */
-#define MIPSNET_INTCTL_TXDONE ((uint32_t)(1<< 0))
-#define MIPSNET_INTCTL_RXDONE ((uint32_t)(1<< 1))
-#define MIPSNET_INTCTL_TESTBIT ((uint32_t)(1<<31))
-#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE|MIPSNET_INTCTL_RXDONE|MIPSNET_INTCTL_TESTBIT)
+#define MIPSNET_INTCTL_TXDONE ((uint32_t)(1 << 0))
+#define MIPSNET_INTCTL_RXDONE ((uint32_t)(1 << 1))
+#define MIPSNET_INTCTL_TESTBIT ((uint32_t)(1 << 31))
+#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE | \
+ MIPSNET_INTCTL_RXDONE | \
+ MIPSNET_INTCTL_TESTBIT)
/*
- * Readonly core-specific interrupt info for the device to signal the core.
- * The meaning of the contents of this field might change.
+ * Readonly core-specific interrupt info for the device to signal the
+ * core. The meaning of the contents of this field might change.
+ *
+ * TODO: the whole memIntf interrupt scheme is messy: the device should
+ * have no control what so ever of what VPE/register set is being
+ * used. The MemIntf should only expose interrupt lines, and
+ * something in the config should be responsible for the
+ * line<->core/vpe bindings.
*/
- /*###\todo: the whole memIntf interrupt scheme is messy: the device should have
- * no control what so ever of what VPE/register set is being used.
- * The MemIntf should only expose interrupt lines, and something in the
- * config should be responsible for the line<->core/vpe bindings.
- */
- uint32_t interruptInfo; /*0x18 */
+ uint32_t interruptInfo; /* 0x18 */
/*
* This is where the received data is read out.
* There is more data to read until rxDataReady is 0.
* Only 1 byte at this regs offset is used.
*/
- uint32_t rxDataBuffer; /*0x1c */
+ uint32_t rxDataBuffer; /* 0x1c */
/*
- * This is where the data to transmit is written.
- * Data should be written for the amount specified in the txDataCount register.
- * Only 1 byte at this regs offset is used.
+ * This is where the data to transmit is written. Data should be
+ * written for the amount specified in the txDataCount register. Only
+ * 1 byte at this regs offset is used.
*/
- uint32_t txDataBuffer; /*0x20 */
-} MIPS_T_NetControl;
+ uint32_t txDataBuffer; /* 0x20 */
+};
#define MIPSNET_IO_EXTENT 0x40 /* being generous */
-#define field_offset(field) ((int)&((MIPS_T_NetControl*)(0))->field)
+#define field_offset(field) (offsetof(struct net_control_block, field))
#endif /* __MIPSNET_H */
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e8afa10..64c8151 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -75,7 +75,7 @@
#include "myri10ge_mcp.h"
#include "myri10ge_mcp_gen_header.h"
-#define MYRI10GE_VERSION_STR "1.3.2-1.269"
+#define MYRI10GE_VERSION_STR "1.3.2-1.287"
MODULE_DESCRIPTION("Myricom 10G driver (10GbE)");
MODULE_AUTHOR("Maintainer: help@myri.com");
@@ -214,6 +214,8 @@
unsigned long serial_number;
int vendor_specific_offset;
int fw_multicast_support;
+ unsigned long features;
+ u32 max_tso6;
u32 read_dma;
u32 write_dma;
u32 read_write_dma;
@@ -311,6 +313,7 @@
#define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8)
static void myri10ge_set_multicast_list(struct net_device *dev);
+static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev);
static inline void put_be32(__be32 val, __be32 __iomem * p)
{
@@ -612,6 +615,7 @@
__be32 buf[16];
u32 dma_low, dma_high, size;
int status, i;
+ struct myri10ge_cmd cmd;
size = 0;
status = myri10ge_load_hotplug_firmware(mgp, &size);
@@ -688,6 +692,14 @@
dev_info(&mgp->pdev->dev, "handoff confirmed\n");
myri10ge_dummy_rdma(mgp, 1);
+ /* probe for IPv6 TSO support */
+ mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
+ status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
+ &cmd, 0);
+ if (status == 0) {
+ mgp->max_tso6 = cmd.data0;
+ mgp->features |= NETIF_F_TSO6;
+ }
return 0;
}
@@ -1047,7 +1059,8 @@
hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN;
- /* allocate an skb to attach the page(s) to. */
+ /* allocate an skb to attach the page(s) to. This is done
+ * after trying LRO, so as to avoid skb allocation overheads */
skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16);
if (unlikely(skb == NULL)) {
@@ -1217,7 +1230,8 @@
static int myri10ge_poll(struct napi_struct *napi, int budget)
{
- struct myri10ge_priv *mgp = container_of(napi, struct myri10ge_priv, napi);
+ struct myri10ge_priv *mgp =
+ container_of(napi, struct myri10ge_priv, napi);
struct net_device *netdev = mgp->dev;
struct myri10ge_rx_done *rx_done = &mgp->rx_done;
int work_done;
@@ -1382,6 +1396,18 @@
return 0;
}
+static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled)
+{
+ struct myri10ge_priv *mgp = netdev_priv(netdev);
+ unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO);
+
+ if (tso_enabled)
+ netdev->features |= flags;
+ else
+ netdev->features &= ~flags;
+ return 0;
+}
+
static const char myri10ge_gstrings_stats[][ETH_GSTRING_LEN] = {
"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
@@ -1506,7 +1532,7 @@
.set_rx_csum = myri10ge_set_rx_csum,
.set_tx_csum = ethtool_op_set_tx_hw_csum,
.set_sg = ethtool_op_set_sg,
- .set_tso = ethtool_op_set_tso,
+ .set_tso = myri10ge_set_tso,
.get_link = ethtool_op_get_link,
.get_strings = myri10ge_get_strings,
.get_sset_count = myri10ge_get_sset_count,
@@ -2164,7 +2190,8 @@
pseudo_hdr_offset = cksum_offset + skb->csum_offset;
/* If the headers are excessively large, then we must
* fall back to a software checksum */
- if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) {
+ if (unlikely(!mss && (cksum_offset > 255 ||
+ pseudo_hdr_offset > 127))) {
if (skb_checksum_help(skb))
goto drop;
cksum_offset = 0;
@@ -2184,9 +2211,18 @@
/* negative cum_len signifies to the
* send loop that we are still in the
* header portion of the TSO packet.
- * TSO header must be at most 134 bytes long */
+ * TSO header can be at most 1KB long */
cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
+ /* for IPv6 TSO, the checksum offset stores the
+ * TCP header length, to save the firmware from
+ * the need to parse the headers */
+ if (skb_is_gso_v6(skb)) {
+ cksum_offset = tcp_hdrlen(skb);
+ /* Can only handle headers <= max_tso6 long */
+ if (unlikely(-cum_len > mgp->max_tso6))
+ return myri10ge_sw_tso(skb, dev);
+ }
/* for TSO, pseudo_hdr_offset holds mss.
* The firmware figures out where to put
* the checksum by parsing the header. */
@@ -2301,10 +2337,12 @@
req++;
count++;
rdma_count++;
- if (unlikely(cksum_offset > seglen))
- cksum_offset -= seglen;
- else
- cksum_offset = 0;
+ if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) {
+ if (unlikely(cksum_offset > seglen))
+ cksum_offset -= seglen;
+ else
+ cksum_offset = 0;
+ }
}
if (frag_idx == frag_cnt)
break;
@@ -2387,6 +2425,41 @@
}
+static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev)
+{
+ struct sk_buff *segs, *curr;
+ struct myri10ge_priv *mgp = dev->priv;
+ int status;
+
+ segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6);
+ if (unlikely(IS_ERR(segs)))
+ goto drop;
+
+ while (segs) {
+ curr = segs;
+ segs = segs->next;
+ curr->next = NULL;
+ status = myri10ge_xmit(curr, dev);
+ if (status != 0) {
+ dev_kfree_skb_any(curr);
+ if (segs != NULL) {
+ curr = segs;
+ segs = segs->next;
+ curr->next = NULL;
+ dev_kfree_skb_any(segs);
+ }
+ goto drop;
+ }
+ }
+ dev_kfree_skb_any(skb);
+ return 0;
+
+drop:
+ dev_kfree_skb_any(skb);
+ mgp->stats.tx_dropped += 1;
+ return 0;
+}
+
static struct net_device_stats *myri10ge_get_stats(struct net_device *dev)
{
struct myri10ge_priv *mgp = netdev_priv(dev);
@@ -2706,7 +2779,6 @@
}
#ifdef CONFIG_PM
-
static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct myri10ge_priv *mgp;
@@ -2787,7 +2859,6 @@
return -EIO;
}
-
#endif /* CONFIG_PM */
static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp)
@@ -2954,8 +3025,7 @@
mgp = netdev_priv(netdev);
mgp->dev = netdev;
- netif_napi_add(netdev, &mgp->napi,
- myri10ge_poll, myri10ge_napi_weight);
+ netif_napi_add(netdev, &mgp->napi, myri10ge_poll, myri10ge_napi_weight);
mgp->pdev = pdev;
mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
mgp->pause = myri10ge_flow_control;
@@ -3077,7 +3147,7 @@
netdev->change_mtu = myri10ge_change_mtu;
netdev->set_multicast_list = myri10ge_set_multicast_list;
netdev->set_mac_address = myri10ge_set_mac_address;
- netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
+ netdev->features = mgp->features;
if (dac_enabled)
netdev->features |= NETIF_F_HIGHDMA;
diff --git a/drivers/net/myri10ge/myri10ge_mcp.h b/drivers/net/myri10ge/myri10ge_mcp.h
index a1d2a22..58e5717 100644
--- a/drivers/net/myri10ge/myri10ge_mcp.h
+++ b/drivers/net/myri10ge/myri10ge_mcp.h
@@ -10,7 +10,7 @@
__be32 low;
};
-/* 4 Bytes */
+/* 4 Bytes. 8 Bytes for NDIS drivers. */
struct mcp_slot {
__sum16 checksum;
__be16 length;
@@ -205,8 +205,87 @@
/* same than DMA_TEST (same args) but abort with UNALIGNED on unaligned
* chipset */
- MXGEFW_CMD_UNALIGNED_STATUS
- /* return data = boolean, true if the chipset is known to be unaligned */
+ MXGEFW_CMD_UNALIGNED_STATUS,
+ /* return data = boolean, true if the chipset is known to be unaligned */
+
+ MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
+ /* data0 = number of big buffers to use. It must be 0 or a power of 2.
+ * 0 indicates that the NIC consumes as many buffers as they are required
+ * for packet. This is the default behavior.
+ * A power of 2 number indicates that the NIC always uses the specified
+ * number of buffers for each big receive packet.
+ * It is up to the driver to ensure that this value is big enough for
+ * the NIC to be able to receive maximum-sized packets.
+ */
+
+ MXGEFW_CMD_GET_MAX_RSS_QUEUES,
+ MXGEFW_CMD_ENABLE_RSS_QUEUES,
+ /* data0 = number of slices n (0, 1, ..., n-1) to enable
+ * data1 = interrupt mode. 0=share one INTx/MSI, 1=use one MSI-X per queue.
+ * If all queues share one interrupt, the driver must have set
+ * RSS_SHARED_INTERRUPT_DMA before enabling queues.
+ */
+ MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET,
+ MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA,
+ /* data0, data1 = bus address lsw, msw */
+ MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
+ /* get the offset of the indirection table */
+ MXGEFW_CMD_SET_RSS_TABLE_SIZE,
+ /* set the size of the indirection table */
+ MXGEFW_CMD_GET_RSS_KEY_OFFSET,
+ /* get the offset of the secret key */
+ MXGEFW_CMD_RSS_KEY_UPDATED,
+ /* tell nic that the secret key's been updated */
+ MXGEFW_CMD_SET_RSS_ENABLE,
+ /* data0 = enable/disable rss
+ * 0: disable rss. nic does not distribute receive packets.
+ * 1: enable rss. nic distributes receive packets among queues.
+ * data1 = hash type
+ * 1: IPV4
+ * 2: TCP_IPV4
+ * 3: IPV4 | TCP_IPV4
+ */
+
+ MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
+ /* Return data = the max. size of the entire headers of a IPv6 TSO packet.
+ * If the header size of a IPv6 TSO packet is larger than the specified
+ * value, then the driver must not use TSO.
+ * This size restriction only applies to IPv6 TSO.
+ * For IPv4 TSO, the maximum size of the headers is fixed, and the NIC
+ * always has enough header buffer to store maximum-sized headers.
+ */
+
+ MXGEFW_CMD_SET_TSO_MODE,
+ /* data0 = TSO mode.
+ * 0: Linux/FreeBSD style (NIC default)
+ * 1: NDIS/NetBSD style
+ */
+
+ MXGEFW_CMD_MDIO_READ,
+ /* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */
+ MXGEFW_CMD_MDIO_WRITE,
+ /* data0 = dev_addr, data1 = register/addr, data2 = value */
+
+ MXGEFW_CMD_XFP_I2C_READ,
+ /* Starts to get a fresh copy of one byte or of the whole xfp i2c table, the
+ * obtained data is cached inside the xaui-xfi chip :
+ * data0 : "all" flag : 0 => get one byte, 1=> get 256 bytes,
+ * data1 : if (data0 == 0): index of byte to refresh [ not used otherwise ]
+ * The operation might take ~1ms for a single byte or ~65ms when refreshing all 256 bytes
+ * During the i2c operation, MXGEFW_CMD_XFP_I2C_READ or MXGEFW_CMD_XFP_BYTE attempts
+ * will return MXGEFW_CMD_ERROR_BUSY
+ */
+ MXGEFW_CMD_XFP_BYTE,
+ /* Return the last obtained copy of a given byte in the xfp i2c table
+ * (copy cached during the last relevant MXGEFW_CMD_XFP_I2C_READ)
+ * data0 : index of the desired table entry
+ * Return data = the byte stored at the requested index in the table
+ */
+
+ MXGEFW_CMD_GET_VPUMP_OFFSET,
+ /* Return data = NIC memory offset of mcp_vpump_public_global */
+ MXGEFW_CMD_RESET_VPUMP,
+ /* Resets the VPUMP state */
};
enum myri10ge_mcp_cmd_status {
@@ -220,7 +299,10 @@
MXGEFW_CMD_ERROR_BAD_PORT,
MXGEFW_CMD_ERROR_RESOURCES,
MXGEFW_CMD_ERROR_MULTICAST,
- MXGEFW_CMD_ERROR_UNALIGNED
+ MXGEFW_CMD_ERROR_UNALIGNED,
+ MXGEFW_CMD_ERROR_NO_MDIO,
+ MXGEFW_CMD_ERROR_XFP_FAILURE,
+ MXGEFW_CMD_ERROR_XFP_ABSENT
};
#define MXGEFW_OLD_IRQ_DATA_LEN 40
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 527f9dc..50e1ec6 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -1576,7 +1576,7 @@
/* Set the timer to check for link beat. */
init_timer(&np->timer);
- np->timer.expires = jiffies + NATSEMI_TIMER_FREQ;
+ np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ);
np->timer.data = (unsigned long)dev;
np->timer.function = &netdev_timer; /* timer handler */
add_timer(&np->timer);
@@ -1856,7 +1856,11 @@
next_tick = 1;
}
}
- mod_timer(&np->timer, jiffies + next_tick);
+
+ if (next_tick > 1)
+ mod_timer(&np->timer, round_jiffies(jiffies + next_tick));
+ else
+ mod_timer(&np->timer, jiffies + next_tick);
}
static void dump_ring(struct net_device *dev)
@@ -3310,13 +3314,19 @@
{
struct net_device *dev = pci_get_drvdata (pdev);
struct netdev_private *np = netdev_priv(dev);
+ int ret = 0;
rtnl_lock();
if (netif_device_present(dev))
goto out;
if (netif_running(dev)) {
BUG_ON(!np->hands_off);
- pci_enable_device(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "pci_enable_device() failed: %d\n", ret);
+ goto out;
+ }
/* pci_power_on(pdev); */
napi_enable(&np->napi);
@@ -3331,12 +3341,12 @@
spin_unlock_irq(&np->lock);
enable_irq(dev->irq);
- mod_timer(&np->timer, jiffies + 1*HZ);
+ mod_timer(&np->timer, round_jiffies(jiffies + 1*HZ));
}
netif_device_attach(dev);
out:
rtnl_unlock();
- return 0;
+ return ret;
}
#endif /* CONFIG_PM */
diff --git a/drivers/net/ne-h8300.c b/drivers/net/ne-h8300.c
index 368f256..fbc7531 100644
--- a/drivers/net/ne-h8300.c
+++ b/drivers/net/ne-h8300.c
@@ -93,7 +93,7 @@
bus_width = *(volatile unsigned char *)ABWCR;
bus_width &= 1 << ((base_addr >> 21) & 7);
- for (i = 0; i < sizeof(reg_offset) / sizeof(u32); i++)
+ for (i = 0; i < ARRAY_SIZE(reg_offset); i++)
if (bus_width == 0)
reg_offset[i] = i * 2 + 1;
else
@@ -115,7 +115,7 @@
static inline int init_dev(struct net_device *dev)
{
- if (h8300_ne_count < (sizeof(h8300_ne_base) / sizeof(unsigned long))) {
+ if (h8300_ne_count < ARRAY_SIZE(h8300_ne_base)) {
dev->base_addr = h8300_ne_base[h8300_ne_count];
dev->irq = h8300_ne_irq[h8300_ne_count];
h8300_ne_count++;
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 43bfe7e..ed1f9bb 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6123,19 +6123,19 @@
val = nr64(ESPC_PHY_TYPE);
switch (np->port) {
case 0:
- val = (val & ESPC_PHY_TYPE_PORT0) >>
+ val8 = (val & ESPC_PHY_TYPE_PORT0) >>
ESPC_PHY_TYPE_PORT0_SHIFT;
break;
case 1:
- val = (val & ESPC_PHY_TYPE_PORT1) >>
+ val8 = (val & ESPC_PHY_TYPE_PORT1) >>
ESPC_PHY_TYPE_PORT1_SHIFT;
break;
case 2:
- val = (val & ESPC_PHY_TYPE_PORT2) >>
+ val8 = (val & ESPC_PHY_TYPE_PORT2) >>
ESPC_PHY_TYPE_PORT2_SHIFT;
break;
case 3:
- val = (val & ESPC_PHY_TYPE_PORT3) >>
+ val8 = (val & ESPC_PHY_TYPE_PORT3) >>
ESPC_PHY_TYPE_PORT3_SHIFT;
break;
default:
@@ -6143,9 +6143,9 @@
np->port);
return -EINVAL;
}
- niudbg(PROBE, "SPROM: PHY type %llx\n", (unsigned long long) val);
+ niudbg(PROBE, "SPROM: PHY type %x\n", val8);
- switch (val) {
+ switch (val8) {
case ESPC_PHY_TYPE_1G_COPPER:
/* 1G copper, MII */
np->flags &= ~(NIU_FLAGS_FIBER |
@@ -6175,8 +6175,7 @@
break;
default:
- dev_err(np->device, PFX "Bogus SPROM phy type %llu\n",
- (unsigned long long) val);
+ dev_err(np->device, PFX "Bogus SPROM phy type %u\n", val8);
return -EINVAL;
}
@@ -6213,7 +6212,7 @@
val = nr64(ESPC_MOD_STR_LEN);
niudbg(PROBE, "SPROM: MOD_STR_LEN[%llu]\n",
(unsigned long long) val);
- if (val > 8 * 4)
+ if (val >= 8 * 4)
return -EINVAL;
for (i = 0; i < val; i += 4) {
@@ -6229,7 +6228,7 @@
val = nr64(ESPC_BD_MOD_STR_LEN);
niudbg(PROBE, "SPROM: BD_MOD_STR_LEN[%llu]\n",
(unsigned long long) val);
- if (val > 4 * 4)
+ if (val >= 4 * 4)
return -EINVAL;
for (i = 0; i < val; i += 4) {
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index 14361e8..c65199d 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -97,13 +97,16 @@
&lp->evm_saa9730_regs->InterruptBlock1);
}
-static void __attribute_used__ show_saa9730_regs(struct lan_saa9730_private *lp)
+static void __used show_saa9730_regs(struct net_device *dev)
{
+ struct lan_saa9730_private *lp = netdev_priv(dev);
int i, j;
+
printk("TxmBufferA = %p\n", lp->TxmBuffer[0][0]);
printk("TxmBufferB = %p\n", lp->TxmBuffer[1][0]);
printk("RcvBufferA = %p\n", lp->RcvBuffer[0][0]);
printk("RcvBufferB = %p\n", lp->RcvBuffer[1][0]);
+
for (i = 0; i < LAN_SAA9730_BUFFERS; i++) {
for (j = 0; j < LAN_SAA9730_TXM_Q_SIZE; j++) {
printk("TxmBuffer[%d][%d] = %x\n", i, j,
@@ -146,11 +149,13 @@
readl(&lp->lan_saa9730_regs->RxCtl));
printk("lp->lan_saa9730_regs->RxStatus = %x\n",
readl(&lp->lan_saa9730_regs->RxStatus));
+
for (i = 0; i < LAN_SAA9730_CAM_DWORDS; i++) {
writel(i, &lp->lan_saa9730_regs->CamAddress);
printk("lp->lan_saa9730_regs->CamData = %x\n",
readl(&lp->lan_saa9730_regs->CamData));
}
+
printk("dev->stats.tx_packets = %lx\n", dev->stats.tx_packets);
printk("dev->stats.tx_errors = %lx\n", dev->stats.tx_errors);
printk("dev->stats.tx_aborted_errors = %lx\n",
@@ -855,7 +860,7 @@
/* Transmitter timeout, serious problems */
dev->stats.tx_errors++;
printk("%s: transmit timed out, reset\n", dev->name);
- /*show_saa9730_regs(lp); */
+ /*show_saa9730_regs(dev); */
lan_saa9730_restart(lp);
dev->trans_start = jiffies;
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index a679f43..8038f28 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1461,7 +1461,6 @@
}
return IRQ_NONE;
#else
- struct tc35815_local *lp = dev->priv;
int handled;
u32 status;
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 8d04654..4e1b84e 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1906,7 +1906,7 @@
/************** pci *****************/
if ((err = pci_enable_device(pdev))) /* it trigers interrupt, dunno why. */
- RET(err); /* it's not a problem though */
+ goto err_pci; /* it's not a problem though */
if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) &&
!(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) {
@@ -2076,6 +2076,7 @@
pci_release_regions(pdev);
err_dma:
pci_disable_device(pdev);
+err_pci:
vfree(nic);
RET(err);
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 30b1cca..014dc2c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,8 +64,8 @@
#define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": "
-#define DRV_MODULE_VERSION "3.83"
-#define DRV_MODULE_RELDATE "October 10, 2007"
+#define DRV_MODULE_VERSION "3.84"
+#define DRV_MODULE_RELDATE "October 12, 2007"
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0
@@ -5056,6 +5056,12 @@
pci_write_config_dword(tp->pdev, TG3PCI_COMMAND, tp->pci_cmd);
+ if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
+ pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE,
+ tp->pci_cacheline_sz);
+ pci_write_config_byte(tp->pdev, PCI_LATENCY_TIMER,
+ tp->pci_lat_timer);
+ }
/* Make sure PCI-X relaxed ordering bit is clear. */
if (tp->pcix_cap) {
u16 pcix_cmd;
@@ -9034,7 +9040,7 @@
int i;
u32 j;
- for (i = 0; i < sizeof(test_pattern)/sizeof(u32); i++) {
+ for (i = 0; i < ARRAY_SIZE(test_pattern); i++) {
for (j = 0; j < len; j += 4) {
u32 val;
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 9b9cd83..41f34bb 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1041,7 +1041,7 @@
{DC21142, dc21142_infoleaf},
{DC21143, dc21143_infoleaf}
};
-#define INFOLEAF_SIZE (sizeof(infoleaf_array)/(sizeof(int)+sizeof(int *)))
+#define INFOLEAF_SIZE ARRAY_SIZE(infoleaf_array)
/*
** List the SROM info block functions
@@ -1056,7 +1056,7 @@
compact_infoblock
};
-#define COMPACT (sizeof(dc_infoblock)/sizeof(int *) - 1)
+#define COMPACT (ARRAY_SIZE(dc_infoblock) - 1)
/*
** Miscellaneous defines...
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index d00e7d4..bec413b 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -63,7 +63,7 @@
#define UGETH_MSG_DEFAULT (NETIF_MSG_IFUP << 1 ) - 1
void uec_set_ethtool_ops(struct net_device *netdev);
-
+
static DEFINE_SPINLOCK(ugeth_lock);
static struct {
@@ -3454,9 +3454,12 @@
u16 length, howmany = 0;
u32 bd_status;
u8 *bdBuffer;
+ struct net_device * dev;
ugeth_vdbg("%s: IN", __FUNCTION__);
+ dev = ugeth->dev;
+
/* collect received buffers */
bd = ugeth->rxBd[rxQ];
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index b39a541..05df0a3 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -1342,11 +1342,11 @@
if (flp->initialized)
return(-EINVAL);
- for(i=0;i < sizeof(valid_port) / sizeof (int) ; i++)
+ for(i=0; i < ARRAY_SIZE(valid_port); i++)
if (valid_port[i] == map->base_addr)
break;
- if (i == sizeof(valid_port) / sizeof(int))
+ if (i == ARRAY_SIZE(valid_port))
return(-EINVAL);
if (!request_region(map->base_addr, SDLA_IO_EXTENTS, dev->name)){
@@ -1487,12 +1487,12 @@
}
}
- for(i=0;i < sizeof(valid_mem) / sizeof (int) ; i++)
+ for(i=0; i < ARRAY_SIZE(valid_mem); i++)
if (valid_mem[i] == map->mem_start)
break;
err = -EINVAL;
- if (i == sizeof(valid_mem) / sizeof(int))
+ if (i == ARRAY_SIZE(valid_mem))
goto fail2;
if (flp->type == SDLA_S502A && (map->mem_start & 0xF000) >> 12 == 0x0E)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index f464b82..7fd505c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -74,22 +74,12 @@
struct napi_struct napi;
- struct xen_netif_tx_front_ring tx;
- struct xen_netif_rx_front_ring rx;
+ unsigned int evtchn;
+ struct xenbus_device *xbdev;
spinlock_t tx_lock;
- spinlock_t rx_lock;
-
- unsigned int evtchn;
-
- /* Receive-ring batched refills. */
-#define RX_MIN_TARGET 8
-#define RX_DFL_MIN_TARGET 64
-#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
- unsigned rx_min_target, rx_max_target, rx_target;
- struct sk_buff_head rx_batch;
-
- struct timer_list rx_refill_timer;
+ struct xen_netif_tx_front_ring tx;
+ int tx_ring_ref;
/*
* {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
@@ -108,14 +98,23 @@
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
unsigned tx_skb_freelist;
+ spinlock_t rx_lock ____cacheline_aligned_in_smp;
+ struct xen_netif_rx_front_ring rx;
+ int rx_ring_ref;
+
+ /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_DFL_MIN_TARGET 64
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+ unsigned rx_min_target, rx_max_target, rx_target;
+ struct sk_buff_head rx_batch;
+
+ struct timer_list rx_refill_timer;
+
struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
- struct xenbus_device *xbdev;
- int tx_ring_ref;
- int rx_ring_ref;
-
unsigned long rx_pfn_array[NET_RX_RING_SIZE];
struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
struct mmu_update rx_mmu[NET_RX_RING_SIZE];
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index e8010a7..3ac080e 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -5213,6 +5213,10 @@
#endif /* CONFIG_PCI */
TRACE2(("gdth_detect() %d controller detected\n", gdth_ctr_count));
+
+ if (list_empty(&gdth_instances))
+ return -ENODEV;
+
#ifdef GDTH_STATISTICS
TRACE2(("gdth_detect(): Initializing timer !\n"));
init_timer(&gdth_timer);
diff --git a/fs/Kconfig b/fs/Kconfig
index bb02b39..815d201 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1755,6 +1755,14 @@
config SUNRPC_GSS
tristate
+config SUNRPC_XPRT_RDMA
+ tristate "RDMA transport for sunrpc (EXPERIMENTAL)"
+ depends on SUNRPC && INFINIBAND && EXPERIMENTAL
+ default m
+ help
+ Adds a client RPC transport for supporting kernel NFS over RDMA
+ mounts, including Infiniband and iWARP. Experimental.
+
config SUNRPC_BIND34
bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/inode.c b/fs/inode.c
index 29f5068..f97de0a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,15 @@
return NULL;
}
+ spin_lock_init(&inode->i_lock);
+ lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+
+ mutex_init(&inode->i_mutex);
+ lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
+
+ init_rwsem(&inode->i_alloc_sem);
+ lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
@@ -190,8 +199,6 @@
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
- mutex_init(&inode->i_mutex);
- init_rwsem(&inode->i_alloc_sem);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
rwlock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -199,7 +206,6 @@
spin_lock_init(&inode->i_data.private_lock);
INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
- spin_lock_init(&inode->i_lock);
i_size_ordered_init(inode);
#ifdef CONFIG_INOTIFY
INIT_LIST_HEAD(&inode->inotify_watches);
@@ -561,6 +567,18 @@
void unlock_new_inode(struct inode *inode)
{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct file_system_type *type = inode->i_sb->s_type;
+ /*
+ * ensure nobody is actually holding i_mutex
+ */
+ mutex_destroy(&inode->i_mutex);
+ mutex_init(&inode->i_mutex);
+ if (inode->i_mode & S_IFDIR)
+ lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+ else
+ lockdep_set_class(&inode->i_mutex, &type->i_mutex_key);
+#endif
/*
* This is special! We do not need the spinlock
* when clearing I_LOCK, because we're guaranteed
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 772b653..8df5bac 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -233,6 +233,8 @@
return ret;
}
+static struct lock_class_key jbd_handle_key;
+
/* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks)
{
@@ -243,6 +245,8 @@
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
+ lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0);
+
return handle;
}
@@ -286,6 +290,9 @@
current->journal_info = NULL;
handle = ERR_PTR(err);
}
+
+ lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+
return handle;
}
@@ -1411,6 +1418,8 @@
spin_unlock(&journal->j_state_lock);
}
+ lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+
jbd_free_handle(handle);
return err;
}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3353ed8..908b23f 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -10,6 +10,7 @@
#include <linux/utsname.h>
#include <linux/kernel.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
#include <linux/sunrpc/svc.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/sm_inter.h>
@@ -132,7 +133,7 @@
.sin_port = 0,
};
struct rpc_create_args args = {
- .protocol = IPPROTO_UDP,
+ .protocol = XPRT_TRANSPORT_UDP,
.address = (struct sockaddr *)&sin,
.addrsize = sizeof(sin),
.servername = "localhost",
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5316e30..633653b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -62,8 +62,9 @@
}
else
{
- printk(KERN_NOTICE
- "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN);
+ dprintk("lockd: bad cookie size %d (only cookies under "
+ "%d bytes are supported.)\n",
+ len, NLM_MAXCOOKIELEN);
return NULL;
}
return p;
@@ -84,8 +85,7 @@
unsigned int len;
if ((len = ntohl(*p++)) != NFS2_FHSIZE) {
- printk(KERN_NOTICE
- "lockd: bad fhandle size %d (should be %d)\n",
+ dprintk("lockd: bad fhandle size %d (should be %d)\n",
len, NFS2_FHSIZE);
return NULL;
}
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 846fc1d..43ff939 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -64,8 +64,9 @@
}
else
{
- printk(KERN_NOTICE
- "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN);
+ dprintk("lockd: bad cookie size %d (only cookies under "
+ "%d bytes are supported.)\n",
+ len, NLM_MAXCOOKIELEN);
return NULL;
}
return p;
@@ -86,8 +87,7 @@
memset(f->data, 0, sizeof(f->data));
f->size = ntohl(*p++);
if (f->size > NFS_MAXFHSIZE) {
- printk(KERN_NOTICE
- "lockd: bad fhandle size %d (should be <=%d)\n",
+ dprintk("lockd: bad fhandle size %d (should be <=%d)\n",
f->size, NFS_MAXFHSIZE);
return NULL;
}
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b55cb23..df0f41e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -16,4 +16,3 @@
nfs4namespace.o
nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
-nfs-objs := $(nfs-y)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a204484..a532ee1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -23,6 +23,8 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/xprtsock.h>
+#include <linux/sunrpc/xprtrdma.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
@@ -340,7 +342,8 @@
to->to_retries = 2;
switch (proto) {
- case IPPROTO_TCP:
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_RDMA:
if (!to->to_initval)
to->to_initval = 60 * HZ;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
@@ -349,7 +352,7 @@
to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
to->to_exponential = 0;
break;
- case IPPROTO_UDP:
+ case XPRT_TRANSPORT_UDP:
default:
if (!to->to_initval)
to->to_initval = 11 * HZ / 10;
@@ -501,9 +504,9 @@
/*
* Initialise an NFS2 or NFS3 client
*/
-static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+static int nfs_init_client(struct nfs_client *clp,
+ const struct nfs_parsed_mount_data *data)
{
- int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
int error;
if (clp->cl_cons_state == NFS_CS_READY) {
@@ -522,8 +525,8 @@
* Create a client RPC handle for doing FSSTAT with UNIX auth only
* - RFC 2623, sec 2.3.2
*/
- error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
- RPC_AUTH_UNIX, 0);
+ error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
+ data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -538,7 +541,8 @@
/*
* Create a version 2 or 3 client
*/
-static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+static int nfs_init_server(struct nfs_server *server,
+ const struct nfs_parsed_mount_data *data)
{
struct nfs_client *clp;
int error, nfsvers = 2;
@@ -551,7 +555,8 @@
#endif
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+ clp = nfs_get_client(data->nfs_server.hostname,
+ &data->nfs_server.address, nfsvers);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
@@ -581,7 +586,7 @@
if (error < 0)
goto error;
- error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+ error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
if (error < 0)
goto error;
@@ -760,7 +765,7 @@
* Create a version 2 or 3 volume record
* - keyed on server and FSID
*/
-struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
struct nfs_fh *mntfh)
{
struct nfs_server *server;
@@ -906,7 +911,7 @@
* Create a version 4 volume record
*/
static int nfs4_init_server(struct nfs_server *server,
- const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+ const struct nfs_parsed_mount_data *data)
{
int error;
@@ -926,7 +931,7 @@
server->acdirmin = data->acdirmin * HZ;
server->acdirmax = data->acdirmax * HZ;
- error = nfs_init_server_rpcclient(server, authflavour);
+ error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
/* Done */
dprintk("<-- nfs4_init_server() = %d\n", error);
@@ -937,12 +942,7 @@
* Create a version 4 volume record
* - keyed on server and FSID
*/
-struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
- const char *hostname,
- const struct sockaddr_in *addr,
- const char *mntpath,
- const char *ip_addr,
- rpc_authflavor_t authflavour,
+struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
struct nfs_fh *mntfh)
{
struct nfs_fattr fattr;
@@ -956,13 +956,18 @@
return ERR_PTR(-ENOMEM);
/* Get a client record */
- error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour,
- data->proto, data->timeo, data->retrans);
+ error = nfs4_set_client(server,
+ data->nfs_server.hostname,
+ &data->nfs_server.address,
+ data->client_address,
+ data->auth_flavors[0],
+ data->nfs_server.protocol,
+ data->timeo, data->retrans);
if (error < 0)
goto error;
/* set up the general RPC client */
- error = nfs4_init_server(server, data, authflavour);
+ error = nfs4_init_server(server, data);
if (error < 0)
goto error;
@@ -971,7 +976,7 @@
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
/* Probe the root fh to retrieve its FSID */
- error = nfs4_path_walk(server, mntfh, mntpath);
+ error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
if (error < 0)
goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c55a761..af8b235 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -52,7 +52,7 @@
for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
- if ((struct nfs_open_context *)fl->fl_file->private_data != ctx)
+ if (nfs_file_open_context(fl->fl_file) != ctx)
continue;
status = nfs4_lock_delegation_recall(state, fl);
if (status >= 0)
@@ -109,6 +109,7 @@
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+ struct rpc_cred *oldcred;
if (delegation == NULL)
return;
@@ -116,11 +117,12 @@
sizeof(delegation->stateid.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
- put_rpccred(cred);
+ oldcred = delegation->cred;
delegation->cred = get_rpccred(cred);
delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
NFS_I(inode)->delegation_state = delegation->type;
smp_wmb();
+ put_rpccred(oldcred);
}
/*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e4a04d1..8ec7fbd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -200,9 +200,6 @@
desc->timestamp = timestamp;
desc->timestamp_valid = 1;
SetPageUptodate(page);
- spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
- spin_unlock(&inode->i_lock);
/* Ensure consistent page alignment of the data.
* Note: assumes we have exclusive access to this mapping either
* through inode->i_mutex or some other mechanism.
@@ -214,9 +211,7 @@
unlock_page(page);
return 0;
error:
- SetPageError(page);
unlock_page(page);
- nfs_zap_caches(inode);
desc->error = error;
return -EIO;
}
@@ -407,7 +402,7 @@
struct file *file = desc->file;
struct nfs_entry *entry = desc->entry;
struct dentry *dentry = NULL;
- unsigned long fileid;
+ u64 fileid;
int loop_count = 0,
res;
@@ -418,7 +413,7 @@
unsigned d_type = DT_UNKNOWN;
/* Note: entry->prev_cookie contains the cookie for
* retrieving the current dirent on the server */
- fileid = nfs_fileid_to_ino_t(entry->ino);
+ fileid = entry->ino;
/* Get a dentry if we have one */
if (dentry != NULL)
@@ -428,11 +423,12 @@
/* Use readdirplus info */
if (dentry != NULL && dentry->d_inode != NULL) {
d_type = dt_type(dentry->d_inode);
- fileid = dentry->d_inode->i_ino;
+ fileid = NFS_FILEID(dentry->d_inode);
}
res = filldir(dirent, entry->name, entry->len,
- file->f_pos, fileid, d_type);
+ file->f_pos, nfs_compat_user_ino64(fileid),
+ d_type);
if (res < 0)
break;
file->f_pos++;
@@ -490,9 +486,6 @@
page,
NFS_SERVER(inode)->dtsize,
desc->plus);
- spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
- spin_unlock(&inode->i_lock);
desc->page = page;
desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
if (desc->error >= 0) {
@@ -558,7 +551,7 @@
memset(desc, 0, sizeof(*desc));
desc->file = filp;
- desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie;
+ desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie;
desc->decode = NFS_PROTO(inode)->decode_dirent;
desc->plus = NFS_USE_READDIRPLUS(inode);
@@ -623,7 +616,7 @@
}
if (offset != filp->f_pos) {
filp->f_pos = offset;
- ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
+ nfs_file_open_context(filp)->dir_cookie = 0;
}
out:
mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
@@ -650,36 +643,18 @@
*/
static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
{
- unsigned long verf;
-
if (IS_ROOT(dentry))
return 1;
- verf = dentry->d_time;
- if (nfs_caches_unstable(dir)
- || verf != NFS_I(dir)->cache_change_attribute)
+ if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ return 0;
+ /* Revalidate nfsi->cache_change_attribute before we declare a match */
+ if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+ return 0;
+ if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
return 1;
}
-static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
-{
- dentry->d_time = verf;
-}
-
-static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf)
-{
- nfs_set_verifier(dentry, verf);
-}
-
-/*
- * Whenever an NFS operation succeeds, we know that the dentry
- * is valid, so we update the revalidation timestamp.
- */
-static inline void nfs_renew_times(struct dentry * dentry)
-{
- dentry->d_time = jiffies;
-}
-
/*
* Return the intent data that applies to this particular path component
*
@@ -695,6 +670,19 @@
}
/*
+ * Use intent information to check whether or not we're going to do
+ * an O_EXCL create using this path component.
+ */
+static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+{
+ if (NFS_PROTO(dir)->version == 2)
+ return 0;
+ if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
+ return 0;
+ return (nd->intent.open.flags & O_EXCL) != 0;
+}
+
+/*
* Inode and filehandle revalidation for lookups.
*
* We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
@@ -717,6 +705,7 @@
(S_ISREG(inode->i_mode) ||
S_ISDIR(inode->i_mode)))
goto out_force;
+ return 0;
}
return nfs_revalidate_inode(server, inode);
out_force:
@@ -759,7 +748,6 @@
int error;
struct nfs_fh fhandle;
struct nfs_fattr fattr;
- unsigned long verifier;
parent = dget_parent(dentry);
lock_kernel();
@@ -767,10 +755,6 @@
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
- /* Revalidate parent directory attribute cache */
- if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
- goto out_zap_parent;
-
if (!inode) {
if (nfs_neg_need_reval(dir, dentry, nd))
goto out_bad;
@@ -785,7 +769,7 @@
}
/* Force a full look up iff the parent directory has changed */
- if (nfs_check_verifier(dir, dentry)) {
+ if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
if (nfs_lookup_verify_inode(inode, nd))
goto out_zap_parent;
goto out_valid;
@@ -794,7 +778,6 @@
if (NFS_STALE(inode))
goto out_bad;
- verifier = nfs_save_change_attribute(dir);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error)
goto out_bad;
@@ -803,8 +786,7 @@
if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
goto out_bad;
- nfs_renew_times(dentry);
- nfs_refresh_verifier(dentry, verifier);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_valid:
unlock_kernel();
dput(parent);
@@ -815,7 +797,7 @@
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
- NFS_CACHEINV(dir);
+ nfs_mark_for_revalidate(dir);
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
@@ -872,8 +854,6 @@
nfs_complete_unlink(dentry, inode);
unlock_kernel();
}
- /* When creating a negative dentry, we want to renew d_time */
- nfs_renew_times(dentry);
iput(inode);
}
@@ -883,30 +863,6 @@
.d_iput = nfs_dentry_iput,
};
-/*
- * Use intent information to check whether or not we're going to do
- * an O_EXCL create using this path component.
- */
-static inline
-int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
-{
- if (NFS_PROTO(dir)->version == 2)
- return 0;
- if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
- return 0;
- return (nd->intent.open.flags & O_EXCL) != 0;
-}
-
-static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
-{
- struct nfs_server *server = NFS_SERVER(dir);
-
- if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
- /* Revalidate fsid using the parent directory */
- return __nfs_revalidate_inode(server, dir);
- return 0;
-}
-
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
struct dentry *res;
@@ -945,11 +901,6 @@
res = ERR_PTR(error);
goto out_unlock;
}
- error = nfs_reval_fsid(dir, &fattr);
- if (error < 0) {
- res = ERR_PTR(error);
- goto out_unlock;
- }
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
res = (struct dentry *)inode;
if (IS_ERR(res))
@@ -958,17 +909,10 @@
no_entry:
res = d_materialise_unique(dentry, inode);
if (res != NULL) {
- struct dentry *parent;
if (IS_ERR(res))
goto out_unlock;
- /* Was a directory renamed! */
- parent = dget_parent(res);
- if (!IS_ROOT(parent))
- nfs_mark_for_revalidate(parent->d_inode);
- dput(parent);
dentry = res;
}
- nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_unlock:
unlock_kernel();
@@ -1020,28 +964,16 @@
}
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
- /* Let vfs_create() deal with O_EXCL */
+ /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
+ * the dentry. */
if (nd->intent.open.flags & O_EXCL) {
- d_add(dentry, NULL);
+ d_instantiate(dentry, NULL);
goto out;
}
/* Open the file on the server */
lock_kernel();
- /* Revalidate parent directory attribute cache */
- error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
- if (error < 0) {
- res = ERR_PTR(error);
- unlock_kernel();
- goto out;
- }
-
- if (nd->intent.open.flags & O_CREAT) {
- nfs_begin_data_update(dir);
- res = nfs4_atomic_open(dir, dentry, nd);
- nfs_end_data_update(dir);
- } else
- res = nfs4_atomic_open(dir, dentry, nd);
+ res = nfs4_atomic_open(dir, dentry, nd);
unlock_kernel();
if (IS_ERR(res)) {
error = PTR_ERR(res);
@@ -1063,8 +995,6 @@
}
} else if (res != NULL)
dentry = res;
- nfs_renew_times(dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out:
return res;
no_open:
@@ -1076,7 +1006,6 @@
struct dentry *parent = NULL;
struct inode *inode = dentry->d_inode;
struct inode *dir;
- unsigned long verifier;
int openflags, ret = 0;
parent = dget_parent(dentry);
@@ -1086,8 +1015,12 @@
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
- if (inode == NULL)
+ if (inode == NULL) {
+ if (!nfs_neg_need_reval(dir, dentry, nd))
+ ret = 1;
goto out;
+ }
+
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
goto no_open;
@@ -1104,10 +1037,7 @@
* change attribute *before* we do the RPC call.
*/
lock_kernel();
- verifier = nfs_save_change_attribute(dir);
ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
- if (!ret)
- nfs_refresh_verifier(dentry, verifier);
unlock_kernel();
out:
dput(parent);
@@ -1133,6 +1063,7 @@
.len = entry->len,
};
struct inode *inode;
+ unsigned long verf = nfs_save_change_attribute(dir);
switch (name.len) {
case 2:
@@ -1143,6 +1074,14 @@
if (name.name[0] == '.')
return dget(parent);
}
+
+ spin_lock(&dir->i_lock);
+ if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) {
+ spin_unlock(&dir->i_lock);
+ return NULL;
+ }
+ spin_unlock(&dir->i_lock);
+
name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
if (dentry != NULL) {
@@ -1183,12 +1122,8 @@
dentry = alias;
}
- nfs_renew_times(dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- return dentry;
out_renew:
- nfs_renew_times(dentry);
- nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_set_verifier(dentry, verf);
return dentry;
}
@@ -1198,32 +1133,40 @@
int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct dentry *parent = dget_parent(dentry);
+ struct inode *dir = parent->d_inode;
struct inode *inode;
int error = -EACCES;
+ d_drop(dentry);
+
/* We may have been initialized further down */
if (dentry->d_inode)
- return 0;
+ goto out;
if (fhandle->size == 0) {
- struct inode *dir = dentry->d_parent->d_inode;
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error)
- return error;
+ goto out_error;
}
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
if (!(fattr->valid & NFS_ATTR_FATTR)) {
struct nfs_server *server = NFS_SB(dentry->d_sb);
error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
if (error < 0)
- return error;
+ goto out_error;
}
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
error = PTR_ERR(inode);
if (IS_ERR(inode))
- return error;
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry))
- d_rehash(dentry);
+ goto out_error;
+ d_add(dentry, inode);
+out:
+ dput(parent);
return 0;
+out_error:
+ nfs_mark_for_revalidate(dir);
+ dput(parent);
+ return error;
}
/*
@@ -1249,13 +1192,9 @@
open_flags = nd->intent.open.flags;
lock_kernel();
- nfs_begin_data_update(dir);
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
- nfs_end_data_update(dir);
if (error != 0)
goto out_err;
- nfs_renew_times(dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
unlock_kernel();
return 0;
out_err:
@@ -1283,13 +1222,9 @@
attr.ia_valid = ATTR_MODE;
lock_kernel();
- nfs_begin_data_update(dir);
status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
- nfs_end_data_update(dir);
if (status != 0)
goto out_err;
- nfs_renew_times(dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
unlock_kernel();
return 0;
out_err:
@@ -1313,13 +1248,9 @@
attr.ia_mode = mode | S_IFDIR;
lock_kernel();
- nfs_begin_data_update(dir);
error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
- nfs_end_data_update(dir);
if (error != 0)
goto out_err;
- nfs_renew_times(dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
unlock_kernel();
return 0;
out_err:
@@ -1336,12 +1267,10 @@
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
lock_kernel();
- nfs_begin_data_update(dir);
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
/* Ensure the VFS deletes this inode */
if (error == 0 && dentry->d_inode != NULL)
clear_nlink(dentry->d_inode);
- nfs_end_data_update(dir);
unlock_kernel();
return error;
@@ -1350,9 +1279,9 @@
static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
{
static unsigned int sillycounter;
- const int i_inosize = sizeof(dir->i_ino)*2;
+ const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
const int countersize = sizeof(sillycounter)*2;
- const int slen = sizeof(".nfs") + i_inosize + countersize - 1;
+ const int slen = sizeof(".nfs")+fileidsize+countersize-1;
char silly[slen+1];
struct qstr qsilly;
struct dentry *sdentry;
@@ -1370,8 +1299,9 @@
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out;
- sprintf(silly, ".nfs%*.*lx",
- i_inosize, i_inosize, dentry->d_inode->i_ino);
+ sprintf(silly, ".nfs%*.*Lx",
+ fileidsize, fileidsize,
+ (unsigned long long)NFS_FILEID(dentry->d_inode));
/* Return delegation in anticipation of the rename */
nfs_inode_return_delegation(dentry->d_inode);
@@ -1398,19 +1328,14 @@
qsilly.name = silly;
qsilly.len = strlen(silly);
- nfs_begin_data_update(dir);
if (dentry->d_inode) {
- nfs_begin_data_update(dentry->d_inode);
error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
dir, &qsilly);
nfs_mark_for_revalidate(dentry->d_inode);
- nfs_end_data_update(dentry->d_inode);
} else
error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
dir, &qsilly);
- nfs_end_data_update(dir);
if (!error) {
- nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
d_move(dentry, sdentry);
error = nfs_async_unlink(dir, dentry);
@@ -1443,19 +1368,15 @@
goto out;
}
- nfs_begin_data_update(dir);
if (inode != NULL) {
nfs_inode_return_delegation(inode);
- nfs_begin_data_update(inode);
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
/* The VFS may want to delete this inode */
if (error == 0)
drop_nlink(inode);
nfs_mark_for_revalidate(inode);
- nfs_end_data_update(inode);
} else
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
- nfs_end_data_update(dir);
out:
return error;
}
@@ -1493,7 +1414,6 @@
spin_unlock(&dcache_lock);
error = nfs_safe_remove(dentry);
if (!error) {
- nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
} else if (need_rehash)
d_rehash(dentry);
@@ -1548,9 +1468,7 @@
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
kunmap_atomic(kaddr, KM_USER0);
- nfs_begin_data_update(dir);
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
- nfs_end_data_update(dir);
if (error != 0) {
dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
@@ -1590,15 +1508,12 @@
dentry->d_parent->d_name.name, dentry->d_name.name);
lock_kernel();
- nfs_begin_data_update(dir);
- nfs_begin_data_update(inode);
+ d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
atomic_inc(&inode->i_count);
- d_instantiate(dentry, inode);
+ d_add(dentry, inode);
}
- nfs_end_data_update(inode);
- nfs_end_data_update(dir);
unlock_kernel();
return error;
}
@@ -1701,22 +1616,16 @@
d_delete(new_dentry);
}
- nfs_begin_data_update(old_dir);
- nfs_begin_data_update(new_dir);
- nfs_begin_data_update(old_inode);
error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
new_dir, &new_dentry->d_name);
nfs_mark_for_revalidate(old_inode);
- nfs_end_data_update(old_inode);
- nfs_end_data_update(new_dir);
- nfs_end_data_update(old_dir);
out:
if (rehash)
d_rehash(rehash);
if (!error) {
d_move(old_dentry, new_dentry);
- nfs_renew_times(new_dentry);
- nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir));
+ nfs_set_verifier(new_dentry,
+ nfs_save_change_attribute(new_dir));
}
/* new dentry created? */
@@ -1842,7 +1751,7 @@
return NULL;
}
-int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -1854,7 +1763,7 @@
cache = nfs_access_search_rbtree(inode, cred);
if (cache == NULL)
goto out;
- if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+ if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
goto out_stale;
res->jiffies = cache->jiffies;
res->cred = cache->cred;
@@ -1909,7 +1818,7 @@
nfs_access_free_entry(entry);
}
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL)
@@ -1957,6 +1866,24 @@
return -EACCES;
}
+static int nfs_open_permission_mask(int openflags)
+{
+ int mask = 0;
+
+ if (openflags & FMODE_READ)
+ mask |= MAY_READ;
+ if (openflags & FMODE_WRITE)
+ mask |= MAY_WRITE;
+ if (openflags & FMODE_EXEC)
+ mask |= MAY_EXEC;
+ return mask;
+}
+
+int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
+{
+ return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
+}
+
int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
{
struct rpc_cred *cred;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fcf4d38..32fe972 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -368,7 +368,7 @@
return -ENOMEM;
dreq->inode = inode;
- dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
@@ -510,7 +510,6 @@
nfs_direct_write_reschedule(dreq);
break;
default:
- nfs_end_data_update(inode);
if (dreq->commit_data != NULL)
nfs_commit_free(dreq->commit_data);
nfs_direct_free_writedata(dreq);
@@ -533,7 +532,6 @@
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_end_data_update(inode);
nfs_direct_free_writedata(dreq);
nfs_zap_mapping(inode, inode->i_mapping);
nfs_direct_complete(dreq);
@@ -718,14 +716,12 @@
sync = FLUSH_STABLE;
dreq->inode = inode;
- dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
- nfs_begin_data_update(inode);
-
rpc_clnt_sigmask(clnt, &oldset);
result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
if (!result)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 579cf8a..c664bb9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -33,6 +33,7 @@
#include <asm/system.h>
#include "delegation.h"
+#include "internal.h"
#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -55,6 +56,8 @@
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
+static struct vm_operations_struct nfs_file_vm_ops;
+
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read = do_sync_read,
@@ -174,13 +177,38 @@
}
/*
+ * Helper for nfs_file_flush() and nfs_fsync()
+ *
+ * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
+ * disk, but it retrieves and clears ctx->error after synching, despite
+ * the two being set at the same time in nfs_context_set_write_error().
+ * This is because the former is used to notify the _next_ call to
+ * nfs_file_write() that a write error occured, and hence cause it to
+ * fall back to doing a synchronous write.
+ */
+static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
+{
+ int have_error, status;
+ int ret = 0;
+
+ have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ status = nfs_wb_all(inode);
+ have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ if (have_error)
+ ret = xchg(&ctx->error, 0);
+ if (!ret)
+ ret = status;
+ return ret;
+}
+
+/*
* Flush all dirty pages, and check for write errors.
*
*/
static int
nfs_file_flush(struct file *file, fl_owner_t id)
{
- struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file->f_path.dentry->d_inode;
int status;
@@ -189,16 +217,11 @@
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
- lock_kernel();
+
/* Ensure that data+attribute caches are up to date after close() */
- status = nfs_wb_all(inode);
- if (!status) {
- status = ctx->error;
- ctx->error = 0;
- if (!status)
- nfs_revalidate_inode(NFS_SERVER(inode), inode);
- }
- unlock_kernel();
+ status = nfs_do_fsync(ctx, inode);
+ if (!status)
+ nfs_revalidate_inode(NFS_SERVER(inode), inode);
return status;
}
@@ -257,8 +280,11 @@
dentry->d_parent->d_name.name, dentry->d_name.name);
status = nfs_revalidate_mapping(inode, file->f_mapping);
- if (!status)
- status = generic_file_mmap(file, vma);
+ if (!status) {
+ vma->vm_ops = &nfs_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+ file_accessed(file);
+ }
return status;
}
@@ -270,21 +296,13 @@
static int
nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
- struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = dentry->d_inode;
- int status;
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- lock_kernel();
- status = nfs_wb_all(inode);
- if (!status) {
- status = ctx->error;
- ctx->error = 0;
- }
- unlock_kernel();
- return status;
+ return nfs_do_fsync(ctx, inode);
}
/*
@@ -333,7 +351,7 @@
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
- .set_page_dirty = nfs_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.writepage = nfs_writepage,
.writepages = nfs_writepages,
.prepare_write = nfs_prepare_write,
@@ -346,6 +364,43 @@
.launder_page = nfs_launder_page,
};
+static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ struct file *filp = vma->vm_file;
+ unsigned pagelen;
+ int ret = -EINVAL;
+
+ lock_page(page);
+ if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
+ goto out_unlock;
+ pagelen = nfs_page_length(page);
+ if (pagelen == 0)
+ goto out_unlock;
+ ret = nfs_prepare_write(filp, page, 0, pagelen);
+ if (!ret)
+ ret = nfs_commit_write(filp, page, 0, pagelen);
+out_unlock:
+ unlock_page(page);
+ return ret;
+}
+
+static struct vm_operations_struct nfs_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = nfs_vm_page_mkwrite,
+};
+
+static int nfs_need_sync_write(struct file *filp, struct inode *inode)
+{
+ struct nfs_open_context *ctx;
+
+ if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
+ return 1;
+ ctx = nfs_file_open_context(filp);
+ if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
+ return 1;
+ return 0;
+}
+
static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -382,8 +437,8 @@
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
result = generic_file_aio_write(iocb, iov, nr_segs, pos);
/* Return error values for O_SYNC and IS_SYNC() */
- if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
- int err = nfs_fsync(iocb->ki_filp, dentry, 1);
+ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
+ int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
if (err < 0)
result = err;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 71a49c3..035c769 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -49,6 +49,11 @@
#define NFSDBG_FACILITY NFSDBG_VFS
+#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1
+
+/* Default is to see 64-bit inode numbers */
+static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
+
static void nfs_invalidate_inode(struct inode *);
static int nfs_update_inode(struct inode *, struct nfs_fattr *);
@@ -62,6 +67,25 @@
return nfs_fileid_to_ino_t(fattr->fileid);
}
+/**
+ * nfs_compat_user_ino64 - returns the user-visible inode number
+ * @fileid: 64-bit fileid
+ *
+ * This function returns a 32-bit inode number if the boot parameter
+ * nfs.enable_ino64 is zero.
+ */
+u64 nfs_compat_user_ino64(u64 fileid)
+{
+ int ino;
+
+ if (enable_ino64)
+ return fileid;
+ ino = fileid;
+ if (sizeof(ino) < sizeof(fileid))
+ ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8;
+ return ino;
+}
+
int nfs_write_inode(struct inode *inode, int sync)
{
int ret;
@@ -85,7 +109,6 @@
*/
BUG_ON(nfs_have_writebacks(inode));
BUG_ON(!list_empty(&NFS_I(inode)->open_files));
- BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
nfs_zap_acl_cache(inode);
nfs_access_zap_cache(inode);
}
@@ -118,8 +141,8 @@
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
- NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
- NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
+ nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = jiffies;
memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
@@ -156,6 +179,13 @@
spin_unlock(&inode->i_lock);
}
+void nfs_invalidate_atime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+ spin_unlock(&inode->i_lock);
+}
+
/*
* Invalidate, but do not unhash, the inode.
* NB: must be called with inode->i_lock held!
@@ -338,7 +368,6 @@
return 0;
lock_kernel();
- nfs_begin_data_update(inode);
/* Write all dirty data */
if (S_ISREG(inode->i_mode)) {
filemap_write_and_wait(inode->i_mapping);
@@ -352,7 +381,6 @@
error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
if (error == 0)
nfs_refresh_inode(inode, &fattr);
- nfs_end_data_update(inode);
unlock_kernel();
return error;
}
@@ -431,7 +459,7 @@
/* Flush out writes to the server in order to update c/mtime */
if (S_ISREG(inode->i_mode))
- nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
+ nfs_wb_nocommit(inode);
/*
* We may force a getattr if the user cares about atime.
@@ -450,8 +478,10 @@
err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
else
err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (!err)
+ if (!err) {
generic_fillattr(inode, stat);
+ stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ }
return err;
}
@@ -536,7 +566,7 @@
static void nfs_file_clear_open_context(struct file *filp)
{
struct inode *inode = filp->f_path.dentry->d_inode;
- struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
+ struct nfs_open_context *ctx = nfs_file_open_context(filp);
if (ctx) {
filp->private_data = NULL;
@@ -598,16 +628,10 @@
status = nfs_wait_on_inode(inode);
if (status < 0)
goto out;
- if (NFS_STALE(inode)) {
- status = -ESTALE;
- /* Do we trust the cached ESTALE? */
- if (NFS_ATTRTIMEO(inode) != 0) {
- if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) {
- /* no */
- } else
- goto out;
- }
- }
+
+ status = -ESTALE;
+ if (NFS_STALE(inode))
+ goto out;
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
if (status != 0) {
@@ -654,7 +678,7 @@
if (nfs_have_delegation(inode, FMODE_READ))
return 0;
- return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
+ return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
}
/**
@@ -683,11 +707,8 @@
}
spin_lock(&inode->i_lock);
nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode)) {
+ if (S_ISDIR(inode->i_mode))
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- /* This ensures we revalidate child dentries */
- nfsi->cache_change_attribute = jiffies;
- }
spin_unlock(&inode->i_lock);
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
@@ -756,56 +777,27 @@
return ret;
}
-/**
- * nfs_begin_data_update
- * @inode - pointer to inode
- * Declare that a set of operations will update file data on the server
- */
-void nfs_begin_data_update(struct inode *inode)
-{
- atomic_inc(&NFS_I(inode)->data_updates);
-}
-
-/**
- * nfs_end_data_update
- * @inode - pointer to inode
- * Declare end of the operations that will update file data
- * This will mark the inode as immediately needing revalidation
- * of its attribute cache.
- */
-void nfs_end_data_update(struct inode *inode)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
-
- /* Directories: invalidate page cache */
- if (S_ISDIR(inode->i_mode)) {
- spin_lock(&inode->i_lock);
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
- spin_unlock(&inode->i_lock);
- }
- nfsi->cache_change_attribute = jiffies;
- atomic_dec(&nfsi->data_updates);
-}
-
static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
- unsigned long now = jiffies;
+ if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
+ nfsi->change_attr == fattr->pre_change_attr) {
+ nfsi->change_attr = fattr->change_attr;
+ if (S_ISDIR(inode->i_mode))
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ }
/* If we have atomic WCC data, we may update some attributes */
if ((fattr->valid & NFS_ATTR_WCC) != 0) {
- if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
+ if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
- nfsi->cache_change_attribute = now;
- }
if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
- nfsi->cache_change_attribute = now;
+ if (S_ISDIR(inode->i_mode))
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
}
- if (inode->i_size == fattr->pre_size && nfsi->npages == 0) {
+ if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
inode->i_size = fattr->size;
- nfsi->cache_change_attribute = now;
- }
}
}
@@ -822,7 +814,7 @@
{
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_size, new_isize;
- int data_unstable;
+ unsigned long invalid = 0;
/* Has the inode gone and changed behind our back? */
@@ -831,37 +823,41 @@
return -EIO;
}
- /* Are we in the process of updating data on the server? */
- data_unstable = nfs_caches_unstable(inode);
-
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
nfsi->change_attr != fattr->change_attr)
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Verify a few of the more important attributes */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize && nfsi->npages == 0)
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Have any file permissions changed? */
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
|| inode->i_uid != fattr->uid
|| inode->i_gid != fattr->gid)
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+ invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
/* Has the link count changed? */
if (inode->i_nlink != fattr->nlink)
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ invalid |= NFS_INO_INVALID_ATTR;
if (!timespec_equal(&inode->i_atime, &fattr->atime))
- nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
+ invalid |= NFS_INO_INVALID_ATIME;
+
+ if (invalid != 0)
+ nfsi->cache_validity |= invalid;
+ else
+ nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ATIME
+ | NFS_INO_REVAL_PAGECACHE);
nfsi->read_cache_jiffies = fattr->time_start;
return 0;
@@ -911,17 +907,41 @@
int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
- int status = 0;
spin_lock(&inode->i_lock);
- if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
- nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- goto out;
- }
- status = nfs_update_inode(inode, fattr);
-out:
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ if (S_ISDIR(inode->i_mode))
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock);
- return status;
+ return nfs_refresh_inode(inode, fattr);
+}
+
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+ if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
+ (fattr->valid & NFS_ATTR_WCC_V4) == 0) {
+ fattr->pre_change_attr = NFS_I(inode)->change_attr;
+ fattr->valid |= NFS_ATTR_WCC_V4;
+ }
+ if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
+ (fattr->valid & NFS_ATTR_WCC) == 0) {
+ memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
+ memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
+ fattr->pre_size = inode->i_size;
+ fattr->valid |= NFS_ATTR_WCC;
+ }
+ return nfs_post_op_update_inode(inode, fattr);
}
/*
@@ -941,9 +961,8 @@
struct nfs_server *server;
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_isize, new_isize;
- unsigned int invalid = 0;
+ unsigned long invalid = 0;
unsigned long now = jiffies;
- int data_stable;
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
__FUNCTION__, inode->i_sb->s_id, inode->i_ino,
@@ -968,57 +987,51 @@
* Update the read time so we don't revalidate too often.
*/
nfsi->read_cache_jiffies = fattr->time_start;
- nfsi->last_updated = now;
- /* Fix a wraparound issue with nfsi->cache_change_attribute */
- if (time_before(now, nfsi->cache_change_attribute))
- nfsi->cache_change_attribute = now - 600*HZ;
-
- /* Are we racing with known updates of the metadata on the server? */
- data_stable = nfs_verify_change_attribute(inode, fattr->time_start);
- if (data_stable)
- nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME);
+ nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
+ | NFS_INO_REVAL_PAGECACHE);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
+ /* More cache consistency checks */
+ if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
+ /* NFSv2/v3: Check if the mtime agrees */
+ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
+ dprintk("NFS: mtime change on server for file %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ nfsi->cache_change_attribute = now;
+ }
+ /* If ctime has changed we should definitely clear access+acl caches */
+ if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
+ invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ } else if (nfsi->change_attr != fattr->change_attr) {
+ dprintk("NFS: change_attr change on server for file %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfsi->cache_change_attribute = now;
+ }
+
/* Check if our cached file size is stale */
new_isize = nfs_size_to_loff_t(fattr->size);
cur_isize = i_size_read(inode);
if (new_isize != cur_isize) {
- /* Do we perhaps have any outstanding writes? */
- if (nfsi->npages == 0) {
- /* No, but did we race with nfs_end_data_update()? */
- if (data_stable) {
- inode->i_size = new_isize;
- invalid |= NFS_INO_INVALID_DATA;
- }
- invalid |= NFS_INO_INVALID_ATTR;
- } else if (new_isize > cur_isize) {
+ /* Do we perhaps have any outstanding writes, or has
+ * the file grown beyond our last write? */
+ if (nfsi->npages == 0 || new_isize > cur_isize) {
inode->i_size = new_isize;
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
}
- nfsi->cache_change_attribute = now;
dprintk("NFS: isize change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
}
- /* Check if the mtime agrees */
- if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
- memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
- dprintk("NFS: mtime change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- nfsi->cache_change_attribute = now;
- }
- /* If ctime has changed we should definitely clear access+acl caches */
- if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
- invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
- nfsi->cache_change_attribute = now;
- }
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+ nfsi->change_attr = fattr->change_attr;
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
inode->i_uid != fattr->uid ||
@@ -1039,31 +1052,29 @@
inode->i_blocks = fattr->du.nfs2.blocks;
}
- if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
- nfsi->change_attr != fattr->change_attr) {
- dprintk("NFS: change_attr change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- nfsi->change_attr = fattr->change_attr;
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = now;
- }
-
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
- } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
- if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
- nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
- nfsi->attrtimeo_timestamp = now;
+ nfsi->last_updated = now;
+ } else {
+ if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
+ if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
+ nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = now;
+ }
+ /*
+ * Avoid jiffy wraparound issues with nfsi->last_updated
+ */
+ if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now))
+ nfsi->last_updated = nfsi->read_cache_jiffies;
}
+ invalid &= ~NFS_INO_INVALID_ATTR;
/* Don't invalidate the data if we were to blame */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode)))
invalid &= ~NFS_INO_INVALID_DATA;
- if (data_stable)
- invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
if (!nfs_have_delegation(inode, FMODE_READ) ||
(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
nfsi->cache_validity |= invalid;
@@ -1152,7 +1163,6 @@
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
- atomic_set(&nfsi->data_updates, 0);
nfsi->ncommit = 0;
nfsi->npages = 0;
nfs4_init_once(nfsi);
@@ -1249,6 +1259,7 @@
/* Not quite true; I just maintain it */
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
MODULE_LICENSE("GPL");
+module_param(enable_ino64, bool, 0644);
module_init(init_nfs_fs)
module_exit(exit_nfs_fs)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 76cf55d..f3acf48 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,8 +5,6 @@
#include <linux/mount.h>
struct nfs_string;
-struct nfs_mount_data;
-struct nfs4_mount_data;
/* Maximum number of readahead requests
* FIXME: this should really be a sysctl so that users may tune it to suit
@@ -27,20 +25,50 @@
rpc_authflavor_t authflavor;
};
+/*
+ * In-kernel mount arguments
+ */
+struct nfs_parsed_mount_data {
+ int flags;
+ int rsize, wsize;
+ int timeo, retrans;
+ int acregmin, acregmax,
+ acdirmin, acdirmax;
+ int namlen;
+ unsigned int bsize;
+ unsigned int auth_flavor_len;
+ rpc_authflavor_t auth_flavors[1];
+ char *client_address;
+
+ struct {
+ struct sockaddr_in address;
+ char *hostname;
+ unsigned int program;
+ unsigned int version;
+ unsigned short port;
+ int protocol;
+ } mount_server;
+
+ struct {
+ struct sockaddr_in address;
+ char *hostname;
+ char *export_path;
+ unsigned int program;
+ int protocol;
+ } nfs_server;
+};
+
/* client.c */
extern struct rpc_program nfs_program;
extern void nfs_put_client(struct nfs_client *);
extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
-extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
- struct nfs_fh *);
-extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
- const char *,
- const struct sockaddr_in *,
- const char *,
- const char *,
- rpc_authflavor_t,
- struct nfs_fh *);
+extern struct nfs_server *nfs_create_server(
+ const struct nfs_parsed_mount_data *,
+ struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(
+ const struct nfs_parsed_mount_data *,
+ struct nfs_fh *);
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
extern void nfs_free_server(struct nfs_server *server);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c5fce75..668ab96 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -251,6 +251,7 @@
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
xdr_inline_pages(&req->rq_rcv_buf, replen,
args->pages, args->pgbase, count);
+ req->rq_rcv_buf.flags |= XDRBUF_READ;
return 0;
}
@@ -271,7 +272,7 @@
res->eof = 0;
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
- printk(KERN_WARNING "NFS: READ reply header overflowed:"
+ dprintk("NFS: READ reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
@@ -281,7 +282,7 @@
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
- printk(KERN_WARNING "NFS: server cheating in read reply: "
+ dprintk("NFS: server cheating in read reply: "
"count %d > recvd %d\n", count, recvd);
count = recvd;
}
@@ -313,6 +314,7 @@
/* Copy the page array */
xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+ sndbuf->flags |= XDRBUF_WRITE;
return 0;
}
@@ -431,7 +433,7 @@
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
- printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+ dprintk("NFS: READDIR reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
@@ -454,7 +456,7 @@
len = ntohl(*p++);
p += XDR_QUADLEN(len) + 1; /* name plus cookie */
if (len > NFS2_MAXNAMLEN) {
- printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
+ dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
len);
goto err_unmap;
}
@@ -471,7 +473,7 @@
entry[0] = entry[1] = 0;
/* truncate listing ? */
if (!nr) {
- printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+ dprintk("NFS: readdir reply truncated!\n");
entry[1] = 1;
}
goto out;
@@ -583,12 +585,12 @@
/* Convert length of symlink */
len = ntohl(*p++);
if (len >= rcvbuf->page_len || len <= 0) {
- dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+ dprintk("nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
}
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
- printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+ dprintk("NFS: READLINK reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
@@ -597,7 +599,7 @@
}
recvd = req->rq_rcv_buf.len - hdrlen;
if (recvd < len) {
- printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+ dprintk("NFS: server cheating in readlink reply: "
"count %u > recvd %u\n", len, recvd);
return -EIO;
}
@@ -695,7 +697,7 @@
if (nfs_errtbl[i].stat == stat)
return nfs_errtbl[i].errno;
}
- printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+ dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
return nfs_errtbl[i].errno;
}
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7322da4..9b73625 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -317,13 +317,11 @@
}
dprintk("NFS call setacl\n");
- nfs_begin_data_update(inode);
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
status = rpc_call_sync(server->client_acl, &msg, 0);
spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
spin_unlock(&inode->i_lock);
- nfs_end_data_update(inode);
dprintk("NFS reply setacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c7ca5d7..4cdc236 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -166,6 +166,7 @@
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_argp = fhandle;
@@ -173,8 +174,6 @@
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
dprintk("NFS reply lookup: %d\n", status);
- if (status >= 0)
- status = nfs_refresh_inode(dir, &dir_attr);
return status;
}
@@ -607,6 +606,9 @@
nfs_fattr_init(&dir_attr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ nfs_invalidate_atime(dir);
+
nfs_refresh_inode(dir, &dir_attr);
dprintk("NFS reply readdir: %d\n", status);
return status;
@@ -724,9 +726,9 @@
{
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
- /* Call back common NFS readpage processing */
- if (task->tk_status >= 0)
- nfs_refresh_inode(data->inode, &data->fattr);
+
+ nfs_invalidate_atime(data->inode);
+ nfs_refresh_inode(data->inode, &data->fattr);
return 0;
}
@@ -747,7 +749,7 @@
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
return 0;
}
@@ -775,8 +777,7 @@
{
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
- if (task->tk_status >= 0)
- nfs_post_op_update_inode(data->inode, data->res.fattr);
+ nfs_refresh_inode(data->inode, data->res.fattr);
return 0;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9e08f0..616d326 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -346,6 +346,7 @@
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
xdr_inline_pages(&req->rq_rcv_buf, replen,
args->pages, args->pgbase, count);
+ req->rq_rcv_buf.flags |= XDRBUF_READ;
return 0;
}
@@ -367,6 +368,7 @@
/* Copy the page array */
xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+ sndbuf->flags |= XDRBUF_WRITE;
return 0;
}
@@ -524,7 +526,7 @@
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
- printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+ dprintk("NFS: READDIR reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
@@ -547,7 +549,7 @@
len = ntohl(*p++); /* string length */
p += XDR_QUADLEN(len) + 2; /* name + cookie */
if (len > NFS3_MAXNAMLEN) {
- printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
+ dprintk("NFS: giant filename in readdir (len %x)!\n",
len);
goto err_unmap;
}
@@ -567,7 +569,7 @@
goto short_pkt;
len = ntohl(*p++);
if (len > NFS3_FHSIZE) {
- printk(KERN_WARNING "NFS: giant filehandle in "
+ dprintk("NFS: giant filehandle in "
"readdir (len %x)!\n", len);
goto err_unmap;
}
@@ -588,7 +590,7 @@
entry[0] = entry[1] = 0;
/* truncate listing ? */
if (!nr) {
- printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+ dprintk("NFS: readdir reply truncated!\n");
entry[1] = 1;
}
goto out;
@@ -826,22 +828,23 @@
/* Convert length of symlink */
len = ntohl(*p++);
if (len >= rcvbuf->page_len || len <= 0) {
- dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+ dprintk("nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
}
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
- printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+ dprintk("NFS: READLINK reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+ dprintk("NFS: READLINK header is short. "
+ "iovec will be shifted.\n");
xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
}
recvd = req->rq_rcv_buf.len - hdrlen;
if (recvd < len) {
- printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+ dprintk("NFS: server cheating in readlink reply: "
"count %u > recvd %u\n", len, recvd);
return -EIO;
}
@@ -876,13 +879,13 @@
ocount = ntohl(*p++);
if (ocount != count) {
- printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
+ dprintk("NFS: READ count doesn't match RPC opaque count.\n");
return -errno_NFSERR_IO;
}
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
- printk(KERN_WARNING "NFS: READ reply header overflowed:"
+ dprintk("NFS: READ reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
@@ -892,7 +895,7 @@
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
- printk(KERN_WARNING "NFS: server cheating in read reply: "
+ dprintk("NFS: server cheating in read reply: "
"count %d > recvd %d\n", count, recvd);
count = recvd;
res->eof = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b90e17..cb99fd9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -62,10 +62,8 @@
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
-static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
-static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
@@ -177,7 +175,7 @@
*p++ = xdr_one; /* bitmap length */
*p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
*p++ = htonl(8); /* attribute buffer length */
- p = xdr_encode_hyper(p, dentry->d_inode->i_ino);
+ p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
}
*p++ = xdr_one; /* next */
@@ -189,7 +187,7 @@
*p++ = xdr_one; /* bitmap length */
*p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
*p++ = htonl(8); /* attribute buffer length */
- p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino);
+ p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
readdir->pgbase = (char *)p - (char *)start;
readdir->count -= readdir->pgbase;
@@ -211,8 +209,9 @@
spin_lock(&dir->i_lock);
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
- if (cinfo->before == nfsi->change_attr && cinfo->atomic)
- nfsi->change_attr = cinfo->after;
+ if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
+ nfsi->cache_change_attribute = jiffies;
+ nfsi->change_attr = cinfo->after;
spin_unlock(&dir->i_lock);
}
@@ -454,7 +453,7 @@
memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
rcu_read_unlock();
lock_kernel();
- ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+ ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
unlock_kernel();
if (ret != 0)
goto out;
@@ -948,36 +947,6 @@
return 0;
}
-static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
-{
- struct nfs_access_entry cache;
- int mask = 0;
- int status;
-
- if (openflags & FMODE_READ)
- mask |= MAY_READ;
- if (openflags & FMODE_WRITE)
- mask |= MAY_WRITE;
- if (openflags & FMODE_EXEC)
- mask |= MAY_EXEC;
- status = nfs_access_get_cached(inode, cred, &cache);
- if (status == 0)
- goto out;
-
- /* Be clever: ask server to check for all possible rights */
- cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
- cache.cred = cred;
- cache.jiffies = jiffies;
- status = _nfs4_proc_access(inode, &cache);
- if (status != 0)
- return status;
- nfs_access_add_cache(inode, &cache);
-out:
- if ((cache.mask & mask) == mask)
- return 0;
- return -EACCES;
-}
-
static int nfs4_recover_expired_lease(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
@@ -1381,7 +1350,7 @@
/* If the open_intent is for execute, we have an extra check to make */
if (nd->intent.open.flags & FMODE_EXEC) {
- ret = _nfs4_do_access(state->inode,
+ ret = nfs_may_open(state->inode,
state->owner->so_cred,
nd->intent.open.flags);
if (ret < 0)
@@ -1390,7 +1359,7 @@
filp = lookup_instantiate_filp(nd, path->dentry, NULL);
if (!IS_ERR(filp)) {
struct nfs_open_context *ctx;
- ctx = (struct nfs_open_context *)filp->private_data;
+ ctx = nfs_file_open_context(filp);
ctx->state = state;
return 0;
}
@@ -1428,13 +1397,16 @@
state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
- if (PTR_ERR(state) == -ENOENT)
+ if (PTR_ERR(state) == -ENOENT) {
d_add(dentry, NULL);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ }
return (struct dentry *)state;
}
res = d_add_unique(dentry, igrab(state->inode));
if (res != NULL)
path.dentry = res;
+ nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
nfs4_intent_set_file(nd, &path, state);
return res;
}
@@ -1468,6 +1440,7 @@
}
}
if (state->inode == dentry->d_inode) {
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
nfs4_intent_set_file(nd, &path, state);
return 1;
}
@@ -1757,10 +1730,16 @@
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_fattr fattr;
struct nfs4_accessargs args = {
.fh = NFS_FH(inode),
+ .bitmask = server->attr_bitmask,
};
- struct nfs4_accessres res = { 0 };
+ struct nfs4_accessres res = {
+ .server = server,
+ .fattr = &fattr,
+ };
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
.rpc_argp = &args,
@@ -1786,6 +1765,7 @@
if (mode & MAY_EXEC)
args.access |= NFS4_ACCESS_EXECUTE;
}
+ nfs_fattr_init(&fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (!status) {
entry->mask = 0;
@@ -1795,6 +1775,7 @@
entry->mask |= MAY_WRITE;
if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
entry->mask |= MAY_EXEC;
+ nfs_refresh_inode(inode, &fattr);
}
return status;
}
@@ -1900,11 +1881,13 @@
}
state = nfs4_do_open(dir, &path, flags, sattr, cred);
put_rpccred(cred);
+ d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
goto out;
}
- d_instantiate(dentry, igrab(state->inode));
+ d_add(dentry, igrab(state->inode));
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
if (flags & O_EXCL) {
struct nfs_fattr fattr;
status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
@@ -2218,6 +2201,9 @@
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
if (status == 0)
memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+
+ nfs_invalidate_atime(dir);
+
dprintk("%s: returns %d\n", __FUNCTION__, status);
return status;
}
@@ -2414,6 +2400,8 @@
rpc_restart_call(task);
return -EAGAIN;
}
+
+ nfs_invalidate_atime(data->inode);
if (task->tk_status > 0)
renew_lease(server, data->timestamp);
return 0;
@@ -2443,7 +2431,7 @@
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
- nfs_post_op_update_inode(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
}
return 0;
}
@@ -2485,8 +2473,7 @@
rpc_restart_call(task);
return -EAGAIN;
}
- if (task->tk_status >= 0)
- nfs_post_op_update_inode(inode, data->res.fattr);
+ nfs_refresh_inode(inode, data->res.fattr);
return 0;
}
@@ -3056,7 +3043,7 @@
if (status == 0) {
status = data->rpc_status;
if (status == 0)
- nfs_post_op_update_inode(inode, &data->fattr);
+ nfs_refresh_inode(inode, &data->fattr);
}
rpc_put_task(task);
return status;
@@ -3303,7 +3290,7 @@
status = -ENOMEM;
if (seqid == NULL)
goto out;
- task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid);
+ task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
status = PTR_ERR(task);
if (IS_ERR(task))
goto out;
@@ -3447,7 +3434,7 @@
int ret;
dprintk("%s: begin!\n", __FUNCTION__);
- data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data,
+ data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
fl->fl_u.nfs4_fl.owner);
if (data == NULL)
return -ENOMEM;
@@ -3573,7 +3560,7 @@
int status;
/* verify open state */
- ctx = (struct nfs_open_context *)filp->private_data;
+ ctx = nfs_file_open_context(filp);
state = ctx->state;
if (request->fl_start < 0 || request->fl_end < 0)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3e4adf8..bfb3626 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -774,7 +774,7 @@
for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
- if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
+ if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
status = ops->recover_lock(state, fl);
if (status >= 0)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index badd73b..51dd380 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -376,10 +376,12 @@
decode_locku_maxsz)
#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
- encode_access_maxsz)
+ encode_access_maxsz + \
+ encode_getattr_maxsz)
#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
- decode_access_maxsz)
+ decode_access_maxsz + \
+ decode_getattr_maxsz)
#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_getattr_maxsz)
@@ -562,7 +564,6 @@
#define RESERVE_SPACE(nbytes) do { \
p = xdr_reserve_space(xdr, nbytes); \
- if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
BUG_ON(!p); \
} while (0)
@@ -628,8 +629,8 @@
if (iap->ia_valid & ATTR_UID) {
owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
if (owner_namelen < 0) {
- printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
- iap->ia_uid);
+ dprintk("nfs: couldn't resolve uid %d to string\n",
+ iap->ia_uid);
/* XXX */
strcpy(owner_name, "nobody");
owner_namelen = sizeof("nobody") - 1;
@@ -640,8 +641,8 @@
if (iap->ia_valid & ATTR_GID) {
owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
if (owner_grouplen < 0) {
- printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
- iap->ia_gid);
+ dprintk("nfs: couldn't resolve gid %d to string\n",
+ iap->ia_gid);
strcpy(owner_group, "nobody");
owner_grouplen = sizeof("nobody") - 1;
/* goto out; */
@@ -711,7 +712,7 @@
* Now we backfill the bitmap and the attribute buffer length.
*/
if (len != ((char *)p - (char *)q) + 4) {
- printk ("encode_attr: Attr length calculation error! %u != %Zu\n",
+ printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n",
len, ((char *)p - (char *)q) + 4);
BUG();
}
@@ -1376,14 +1377,20 @@
{
struct xdr_stream xdr;
struct compound_hdr hdr = {
- .nops = 2,
+ .nops = 3,
};
int status;
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
encode_compound_hdr(&xdr, &hdr);
- if ((status = encode_putfh(&xdr, args->fh)) == 0)
- status = encode_access(&xdr, args->access);
+ status = encode_putfh(&xdr, args->fh);
+ if (status != 0)
+ goto out;
+ status = encode_access(&xdr, args->access);
+ if (status != 0)
+ goto out;
+ status = encode_getfattr(&xdr, args->bitmask);
+out:
return status;
}
@@ -1857,6 +1864,7 @@
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
xdr_inline_pages(&req->rq_rcv_buf, replen,
args->pages, args->pgbase, args->count);
+ req->rq_rcv_buf.flags |= XDRBUF_READ;
out:
return status;
}
@@ -1933,6 +1941,7 @@
status = encode_write(&xdr, args);
if (status)
goto out;
+ req->rq_snd_buf.flags |= XDRBUF_WRITE;
status = encode_getfattr(&xdr, args->bitmask);
out:
return status;
@@ -2180,9 +2189,9 @@
#define READ_BUF(nbytes) do { \
p = xdr_inline_decode(xdr, nbytes); \
if (unlikely(!p)) { \
- printk(KERN_INFO "%s: prematurely hit end of receive" \
+ dprintk("nfs: %s: prematurely hit end of receive" \
" buffer\n", __FUNCTION__); \
- printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
+ dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
__FUNCTION__, xdr->p, nbytes, xdr->end); \
return -EIO; \
} \
@@ -2223,9 +2232,8 @@
READ_BUF(8);
READ32(opnum);
if (opnum != expected) {
- printk(KERN_NOTICE
- "nfs4_decode_op_hdr: Server returned operation"
- " %d but we issued a request for %d\n",
+ dprintk("nfs: Server returned operation"
+ " %d but we issued a request for %d\n",
opnum, expected);
return -EIO;
}
@@ -2758,7 +2766,7 @@
dprintk("%s: nfs_map_name_to_uid failed!\n",
__FUNCTION__);
} else
- printk(KERN_WARNING "%s: name too long (%u)!\n",
+ dprintk("%s: name too long (%u)!\n",
__FUNCTION__, len);
bitmap[1] &= ~FATTR4_WORD1_OWNER;
}
@@ -2783,7 +2791,7 @@
dprintk("%s: nfs_map_group_to_gid failed!\n",
__FUNCTION__);
} else
- printk(KERN_WARNING "%s: name too long (%u)!\n",
+ dprintk("%s: name too long (%u)!\n",
__FUNCTION__, len);
bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
}
@@ -2950,7 +2958,8 @@
unsigned int nwords = xdr->p - savep;
if (unlikely(attrwords != nwords)) {
- printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n",
+ dprintk("%s: server returned incorrect attribute length: "
+ "%u %c %u\n",
__FUNCTION__,
attrwords << 2,
(attrwords < nwords) ? '<' : '>',
@@ -3451,7 +3460,7 @@
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
- printk(KERN_WARNING "NFS: server cheating in read reply: "
+ dprintk("NFS: server cheating in read reply: "
"count %u > recvd %u\n", count, recvd);
count = recvd;
eof = 0;
@@ -3500,7 +3509,8 @@
p += 2; /* cookie */
len = ntohl(*p++); /* filename length */
if (len > NFS4_MAXNAMLEN) {
- printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+ dprintk("NFS: giant filename in readdir (len 0x%x)\n",
+ len);
goto err_unmap;
}
xlen = XDR_QUADLEN(len);
@@ -3528,7 +3538,7 @@
entry[0] = entry[1] = 0;
/* truncate listing ? */
if (!nr) {
- printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+ dprintk("NFS: readdir reply truncated!\n");
entry[1] = 1;
}
goto out;
@@ -3554,13 +3564,13 @@
READ_BUF(4);
READ32(len);
if (len >= rcvbuf->page_len || len <= 0) {
- dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+ dprintk("nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
}
hdrlen = (char *) xdr->p - (char *) iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (recvd < len) {
- printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+ dprintk("NFS: server cheating in readlink reply: "
"count %u > recvd %u\n", len, recvd);
return -EIO;
}
@@ -3643,7 +3653,7 @@
hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (attrlen > recvd) {
- printk(KERN_WARNING "NFS: server cheating in getattr"
+ dprintk("NFS: server cheating in getattr"
" acl reply: attrlen %u > recvd %u\n",
attrlen, recvd);
return -EINVAL;
@@ -3688,8 +3698,7 @@
READ_BUF(8);
READ32(opnum);
if (opnum != OP_SETCLIENTID) {
- printk(KERN_NOTICE
- "nfs4_decode_setclientid: Server returned operation"
+ dprintk("nfs: decode_setclientid: Server returned operation"
" %d\n", opnum);
return -EIO;
}
@@ -3783,8 +3792,13 @@
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
goto out;
- if ((status = decode_putfh(&xdr)) == 0)
- status = decode_access(&xdr, res);
+ status = decode_putfh(&xdr);
+ if (status != 0)
+ goto out;
+ status = decode_access(&xdr, res);
+ if (status != 0)
+ goto out;
+ decode_getfattr(&xdr, res->fattr, res->server);
out:
return status;
}
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 3490322..e87b44e 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -76,6 +76,7 @@
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
#include <linux/nfs.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
@@ -491,7 +492,7 @@
struct sockaddr_in sin;
int status;
int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
- IPPROTO_TCP : IPPROTO_UDP;
+ XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP;
int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
NFS_MNT3_VERSION : NFS_MNT_VERSION;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 845cdde..97669ed 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -476,6 +476,8 @@
dprintk("NFS call readdir %d\n", (unsigned int)cookie);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_invalidate_atime(dir);
+
dprintk("NFS reply readdir: %d\n", status);
return status;
}
@@ -550,6 +552,7 @@
static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
+ nfs_invalidate_atime(data->inode);
if (task->tk_status >= 0) {
nfs_refresh_inode(data->inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -576,7 +579,7 @@
static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
if (task->tk_status >= 0)
- nfs_post_op_update_inode(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
return 0;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 19e0563..4587a86 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -341,9 +341,6 @@
set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
nfs_mark_for_revalidate(data->inode);
}
- spin_lock(&data->inode->i_lock);
- NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
- spin_unlock(&data->inode->i_lock);
return 0;
}
@@ -497,8 +494,7 @@
if (ctx == NULL)
goto out_unlock;
} else
- ctx = get_nfs_open_context((struct nfs_open_context *)
- file->private_data);
+ ctx = get_nfs_open_context(nfs_file_open_context(file));
error = nfs_readpage_async(ctx, inode, page);
@@ -576,8 +572,7 @@
if (desc.ctx == NULL)
return -EBADF;
} else
- desc.ctx = get_nfs_open_context((struct nfs_open_context *)
- filp->private_data);
+ desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
if (rsize < PAGE_CACHE_SIZE)
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b878528..fa517ae 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -33,6 +33,8 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/xprtsock.h>
+#include <linux/sunrpc/xprtrdma.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
@@ -58,36 +60,6 @@
#define NFSDBG_FACILITY NFSDBG_VFS
-
-struct nfs_parsed_mount_data {
- int flags;
- int rsize, wsize;
- int timeo, retrans;
- int acregmin, acregmax,
- acdirmin, acdirmax;
- int namlen;
- unsigned int bsize;
- unsigned int auth_flavor_len;
- rpc_authflavor_t auth_flavors[1];
- char *client_address;
-
- struct {
- struct sockaddr_in address;
- unsigned int program;
- unsigned int version;
- unsigned short port;
- int protocol;
- } mount_server;
-
- struct {
- struct sockaddr_in address;
- char *hostname;
- char *export_path;
- unsigned int program;
- int protocol;
- } nfs_server;
-};
-
enum {
/* Mount options that take no arguments */
Opt_soft, Opt_hard,
@@ -97,7 +69,7 @@
Opt_ac, Opt_noac,
Opt_lock, Opt_nolock,
Opt_v2, Opt_v3,
- Opt_udp, Opt_tcp,
+ Opt_udp, Opt_tcp, Opt_rdma,
Opt_acl, Opt_noacl,
Opt_rdirplus, Opt_nordirplus,
Opt_sharecache, Opt_nosharecache,
@@ -116,7 +88,7 @@
/* Mount options that take string arguments */
Opt_sec, Opt_proto, Opt_mountproto,
- Opt_addr, Opt_mounthost, Opt_clientaddr,
+ Opt_addr, Opt_mountaddr, Opt_clientaddr,
/* Mount options that are ignored */
Opt_userspace, Opt_deprecated,
@@ -143,6 +115,7 @@
{ Opt_v3, "v3" },
{ Opt_udp, "udp" },
{ Opt_tcp, "tcp" },
+ { Opt_rdma, "rdma" },
{ Opt_acl, "acl" },
{ Opt_noacl, "noacl" },
{ Opt_rdirplus, "rdirplus" },
@@ -175,13 +148,14 @@
{ Opt_mountproto, "mountproto=%s" },
{ Opt_addr, "addr=%s" },
{ Opt_clientaddr, "clientaddr=%s" },
- { Opt_mounthost, "mounthost=%s" },
+ { Opt_userspace, "mounthost=%s" },
+ { Opt_mountaddr, "mountaddr=%s" },
{ Opt_err, NULL }
};
enum {
- Opt_xprt_udp, Opt_xprt_tcp,
+ Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma,
Opt_xprt_err
};
@@ -189,6 +163,7 @@
static match_table_t nfs_xprt_protocol_tokens = {
{ Opt_xprt_udp, "udp" },
{ Opt_xprt_tcp, "tcp" },
+ { Opt_xprt_rdma, "rdma" },
{ Opt_xprt_err, NULL }
};
@@ -449,7 +424,7 @@
const char *nostr;
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
- { NFS_MOUNT_INTR, ",intr", "" },
+ { NFS_MOUNT_INTR, ",intr", ",nointr" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
{ NFS_MOUNT_NONLM, ",nolock", "" },
@@ -460,8 +435,6 @@
};
const struct proc_nfs_info *nfs_infop;
struct nfs_client *clp = nfss->nfs_client;
- char buf[12];
- const char *proto;
seq_printf(m, ",vers=%d", clp->rpc_ops->version);
seq_printf(m, ",rsize=%d", nfss->rsize);
@@ -480,18 +453,8 @@
else
seq_puts(m, nfs_infop->nostr);
}
- switch (nfss->client->cl_xprt->prot) {
- case IPPROTO_TCP:
- proto = "tcp";
- break;
- case IPPROTO_UDP:
- proto = "udp";
- break;
- default:
- snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
- proto = buf;
- }
- seq_printf(m, ",proto=%s", proto);
+ seq_printf(m, ",proto=%s",
+ rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
seq_printf(m, ",retrans=%u", clp->retrans_count);
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
@@ -506,8 +469,8 @@
nfs_show_mount_options(m, nfss, 0);
- seq_puts(m, ",addr=");
- seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
+ seq_printf(m, ",addr="NIPQUAD_FMT,
+ NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
return 0;
}
@@ -698,13 +661,19 @@
break;
case Opt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = IPPROTO_UDP;
+ mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
mnt->timeo = 7;
mnt->retrans = 5;
break;
case Opt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = IPPROTO_TCP;
+ mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ mnt->timeo = 600;
+ mnt->retrans = 2;
+ break;
+ case Opt_rdma:
+ mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
+ mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
mnt->timeo = 600;
mnt->retrans = 2;
break;
@@ -913,13 +882,20 @@
switch (token) {
case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = IPPROTO_UDP;
+ mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
mnt->timeo = 7;
mnt->retrans = 5;
break;
case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = IPPROTO_TCP;
+ mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ mnt->timeo = 600;
+ mnt->retrans = 2;
+ break;
+ case Opt_xprt_rdma:
+ /* vector side protocols to TCP */
+ mnt->flags |= NFS_MOUNT_TCP;
+ mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
mnt->timeo = 600;
mnt->retrans = 2;
break;
@@ -937,11 +913,12 @@
switch (token) {
case Opt_xprt_udp:
- mnt->mount_server.protocol = IPPROTO_UDP;
+ mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp:
- mnt->mount_server.protocol = IPPROTO_TCP;
+ mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
break;
+ case Opt_xprt_rdma: /* not used for side protocols */
default:
goto out_unrec_xprt;
}
@@ -961,7 +938,7 @@
goto out_nomem;
mnt->client_address = string;
break;
- case Opt_mounthost:
+ case Opt_mountaddr:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
@@ -1027,16 +1004,10 @@
sin = args->mount_server.address;
else
sin = args->nfs_server.address;
- if (args->mount_server.port == 0) {
- status = rpcb_getport_sync(&sin,
- args->mount_server.program,
- args->mount_server.version,
- args->mount_server.protocol);
- if (status < 0)
- goto out_err;
- sin.sin_port = htons(status);
- } else
- sin.sin_port = htons(args->mount_server.port);
+ /*
+ * autobind will be used if mount_server.port == 0
+ */
+ sin.sin_port = htons(args->mount_server.port);
/*
* Now ask the mount server to map our export path
@@ -1049,14 +1020,11 @@
args->mount_server.version,
args->mount_server.protocol,
root_fh);
- if (status < 0)
- goto out_err;
+ if (status == 0)
+ return 0;
- return status;
-
-out_err:
- dfprintk(MOUNT, "NFS: unable to contact server on host "
- NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
+ dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
+ ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
return status;
}
@@ -1079,15 +1047,31 @@
* XXX: as far as I can tell, changing the NFS program number is not
* supported in the NFS client.
*/
-static int nfs_validate_mount_data(struct nfs_mount_data **options,
+static int nfs_validate_mount_data(void *options,
+ struct nfs_parsed_mount_data *args,
struct nfs_fh *mntfh,
const char *dev_name)
{
- struct nfs_mount_data *data = *options;
+ struct nfs_mount_data *data = (struct nfs_mount_data *)options;
if (data == NULL)
goto out_no_data;
+ memset(args, 0, sizeof(*args));
+ args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
+ args->rsize = NFS_MAX_FILE_IO_SIZE;
+ args->wsize = NFS_MAX_FILE_IO_SIZE;
+ args->timeo = 600;
+ args->retrans = 2;
+ args->acregmin = 3;
+ args->acregmax = 60;
+ args->acdirmin = 30;
+ args->acdirmax = 60;
+ args->mount_server.protocol = XPRT_TRANSPORT_UDP;
+ args->mount_server.program = NFS_MNT_PROGRAM;
+ args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ args->nfs_server.program = NFS_PROGRAM;
+
switch (data->version) {
case 1:
data->namlen = 0;
@@ -1116,92 +1100,73 @@
if (mntfh->size < sizeof(mntfh->data))
memset(mntfh->data + mntfh->size, 0,
sizeof(mntfh->data) - mntfh->size);
+
+ if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
+ goto out_no_address;
+
+ /*
+ * Translate to nfs_parsed_mount_data, which nfs_fill_super
+ * can deal with.
+ */
+ args->flags = data->flags;
+ args->rsize = data->rsize;
+ args->wsize = data->wsize;
+ args->flags = data->flags;
+ args->timeo = data->timeo;
+ args->retrans = data->retrans;
+ args->acregmin = data->acregmin;
+ args->acregmax = data->acregmax;
+ args->acdirmin = data->acdirmin;
+ args->acdirmax = data->acdirmax;
+ args->nfs_server.address = data->addr;
+ if (!(data->flags & NFS_MOUNT_TCP))
+ args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
+ args->namlen = data->namlen;
+ args->bsize = data->bsize;
+ args->auth_flavors[0] = data->pseudoflavor;
break;
default: {
unsigned int len;
char *c;
int status;
- struct nfs_parsed_mount_data args = {
- .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
- .rsize = NFS_MAX_FILE_IO_SIZE,
- .wsize = NFS_MAX_FILE_IO_SIZE,
- .timeo = 600,
- .retrans = 2,
- .acregmin = 3,
- .acregmax = 60,
- .acdirmin = 30,
- .acdirmax = 60,
- .mount_server.protocol = IPPROTO_UDP,
- .mount_server.program = NFS_MNT_PROGRAM,
- .nfs_server.protocol = IPPROTO_TCP,
- .nfs_server.program = NFS_PROGRAM,
- };
- if (nfs_parse_mount_options((char *) *options, &args) == 0)
+ if (nfs_parse_mount_options((char *)options, args) == 0)
return -EINVAL;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (data == NULL)
- return -ENOMEM;
-
- /*
- * NB: after this point, caller will free "data"
- * if we return an error
- */
- *options = data;
+ if (!nfs_verify_server_address((struct sockaddr *)
+ &args->nfs_server.address))
+ goto out_no_address;
c = strchr(dev_name, ':');
if (c == NULL)
return -EINVAL;
len = c - dev_name;
- if (len > sizeof(data->hostname))
- return -ENAMETOOLONG;
- strncpy(data->hostname, dev_name, len);
- args.nfs_server.hostname = data->hostname;
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
c++;
if (strlen(c) > NFS_MAXPATHLEN)
return -ENAMETOOLONG;
- args.nfs_server.export_path = c;
+ args->nfs_server.export_path = c;
- status = nfs_try_mount(&args, mntfh);
+ status = nfs_try_mount(args, mntfh);
if (status)
return status;
- /*
- * Translate to nfs_mount_data, which nfs_fill_super
- * can deal with.
- */
- data->version = 6;
- data->flags = args.flags;
- data->rsize = args.rsize;
- data->wsize = args.wsize;
- data->timeo = args.timeo;
- data->retrans = args.retrans;
- data->acregmin = args.acregmin;
- data->acregmax = args.acregmax;
- data->acdirmin = args.acdirmin;
- data->acdirmax = args.acdirmax;
- data->addr = args.nfs_server.address;
- data->namlen = args.namlen;
- data->bsize = args.bsize;
- data->pseudoflavor = args.auth_flavors[0];
-
break;
}
}
- if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
- data->pseudoflavor = RPC_AUTH_UNIX;
+ if (!(args->flags & NFS_MOUNT_SECFLAVOUR))
+ args->auth_flavors[0] = RPC_AUTH_UNIX;
#ifndef CONFIG_NFS_V3
- if (data->flags & NFS_MOUNT_VER3)
+ if (args->flags & NFS_MOUNT_VER3)
goto out_v3_not_compiled;
#endif /* !CONFIG_NFS_V3 */
- if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
- goto out_no_address;
-
return 0;
out_no_data:
@@ -1258,7 +1223,8 @@
/*
* Finish setting up an NFS2/3 superblock
*/
-static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
+static void nfs_fill_super(struct super_block *sb,
+ struct nfs_parsed_mount_data *data)
{
struct nfs_server *server = NFS_SB(sb);
@@ -1379,7 +1345,7 @@
struct nfs_server *server = NULL;
struct super_block *s;
struct nfs_fh mntfh;
- struct nfs_mount_data *data = raw_data;
+ struct nfs_parsed_mount_data data;
struct dentry *mntroot;
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
struct nfs_sb_mountdata sb_mntdata = {
@@ -1388,12 +1354,12 @@
int error;
/* Validate the mount data */
- error = nfs_validate_mount_data(&data, &mntfh, dev_name);
+ error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
if (error < 0)
goto out;
/* Get a volume representation */
- server = nfs_create_server(data, &mntfh);
+ server = nfs_create_server(&data, &mntfh);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out;
@@ -1417,7 +1383,7 @@
if (!s->s_root) {
/* initial superblock/root creation */
- nfs_fill_super(s, data);
+ nfs_fill_super(s, &data);
}
mntroot = nfs_get_root(s, &mntfh);
@@ -1432,8 +1398,7 @@
error = 0;
out:
- if (data != raw_data)
- kfree(data);
+ kfree(data.nfs_server.hostname);
return error;
out_err_nosb:
@@ -1559,38 +1524,49 @@
/*
* Validate NFSv4 mount options
*/
-static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
- const char *dev_name,
- struct sockaddr_in *addr,
- rpc_authflavor_t *authflavour,
- char **hostname,
- char **mntpath,
- char **ip_addr)
+static int nfs4_validate_mount_data(void *options,
+ struct nfs_parsed_mount_data *args,
+ const char *dev_name)
{
- struct nfs4_mount_data *data = *options;
+ struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
char *c;
if (data == NULL)
goto out_no_data;
+ memset(args, 0, sizeof(*args));
+ args->rsize = NFS_MAX_FILE_IO_SIZE;
+ args->wsize = NFS_MAX_FILE_IO_SIZE;
+ args->timeo = 600;
+ args->retrans = 2;
+ args->acregmin = 3;
+ args->acregmax = 60;
+ args->acdirmin = 30;
+ args->acdirmax = 60;
+ args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+
switch (data->version) {
case 1:
- if (data->host_addrlen != sizeof(*addr))
+ if (data->host_addrlen != sizeof(args->nfs_server.address))
goto out_no_address;
- if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
+ if (copy_from_user(&args->nfs_server.address,
+ data->host_addr,
+ sizeof(args->nfs_server.address)))
return -EFAULT;
- if (addr->sin_port == 0)
- addr->sin_port = htons(NFS_PORT);
- if (!nfs_verify_server_address((struct sockaddr *) addr))
+ if (args->nfs_server.address.sin_port == 0)
+ args->nfs_server.address.sin_port = htons(NFS_PORT);
+ if (!nfs_verify_server_address((struct sockaddr *)
+ &args->nfs_server.address))
goto out_no_address;
switch (data->auth_flavourlen) {
case 0:
- *authflavour = RPC_AUTH_UNIX;
+ args->auth_flavors[0] = RPC_AUTH_UNIX;
break;
case 1:
- if (copy_from_user(authflavour, data->auth_flavours,
- sizeof(*authflavour)))
+ if (copy_from_user(&args->auth_flavors[0],
+ data->auth_flavours,
+ sizeof(args->auth_flavors[0])))
return -EFAULT;
break;
default:
@@ -1600,75 +1576,57 @@
c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
if (IS_ERR(c))
return PTR_ERR(c);
- *hostname = c;
+ args->nfs_server.hostname = c;
c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
if (IS_ERR(c))
return PTR_ERR(c);
- *mntpath = c;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
+ args->nfs_server.export_path = c;
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
c = strndup_user(data->client_addr.data, 16);
if (IS_ERR(c))
return PTR_ERR(c);
- *ip_addr = c;
+ args->client_address = c;
+
+ /*
+ * Translate to nfs_parsed_mount_data, which nfs4_fill_super
+ * can deal with.
+ */
+
+ args->flags = data->flags & NFS4_MOUNT_FLAGMASK;
+ args->rsize = data->rsize;
+ args->wsize = data->wsize;
+ args->timeo = data->timeo;
+ args->retrans = data->retrans;
+ args->acregmin = data->acregmin;
+ args->acregmax = data->acregmax;
+ args->acdirmin = data->acdirmin;
+ args->acdirmax = data->acdirmax;
+ args->nfs_server.protocol = data->proto;
break;
default: {
unsigned int len;
- struct nfs_parsed_mount_data args = {
- .rsize = NFS_MAX_FILE_IO_SIZE,
- .wsize = NFS_MAX_FILE_IO_SIZE,
- .timeo = 600,
- .retrans = 2,
- .acregmin = 3,
- .acregmax = 60,
- .acdirmin = 30,
- .acdirmax = 60,
- .nfs_server.protocol = IPPROTO_TCP,
- };
- if (nfs_parse_mount_options((char *) *options, &args) == 0)
+ if (nfs_parse_mount_options((char *)options, args) == 0)
return -EINVAL;
if (!nfs_verify_server_address((struct sockaddr *)
- &args.nfs_server.address))
+ &args->nfs_server.address))
return -EINVAL;
- *addr = args.nfs_server.address;
- switch (args.auth_flavor_len) {
+ switch (args->auth_flavor_len) {
case 0:
- *authflavour = RPC_AUTH_UNIX;
+ args->auth_flavors[0] = RPC_AUTH_UNIX;
break;
case 1:
- *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
break;
default:
goto out_inval_auth;
}
/*
- * Translate to nfs4_mount_data, which nfs4_fill_super
- * can deal with.
- */
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (data == NULL)
- return -ENOMEM;
- *options = data;
-
- data->version = 1;
- data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
- data->rsize = args.rsize;
- data->wsize = args.wsize;
- data->timeo = args.timeo;
- data->retrans = args.retrans;
- data->acregmin = args.acregmin;
- data->acregmax = args.acregmax;
- data->acdirmin = args.acdirmin;
- data->acdirmax = args.acdirmax;
- data->proto = args.nfs_server.protocol;
-
- /*
* Split "dev_name" into "hostname:mntpath".
*/
c = strchr(dev_name, ':');
@@ -1678,27 +1636,25 @@
len = c - dev_name;
if (len > NFS4_MAXNAMLEN)
return -ENAMETOOLONG;
- *hostname = kzalloc(len, GFP_KERNEL);
- if (*hostname == NULL)
+ args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
+ if (args->nfs_server.hostname == NULL)
return -ENOMEM;
- strncpy(*hostname, dev_name, len - 1);
+ strncpy(args->nfs_server.hostname, dev_name, len - 1);
c++; /* step over the ':' */
len = strlen(c);
if (len > NFS4_MAXPATHLEN)
return -ENAMETOOLONG;
- *mntpath = kzalloc(len + 1, GFP_KERNEL);
- if (*mntpath == NULL)
+ args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
+ if (args->nfs_server.export_path == NULL)
return -ENOMEM;
- strncpy(*mntpath, c, len);
+ strncpy(args->nfs_server.export_path, c, len);
- dprintk("MNTPATH: %s\n", *mntpath);
+ dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
- if (args.client_address == NULL)
+ if (args->client_address == NULL)
goto out_no_client_address;
- *ip_addr = args.client_address;
-
break;
}
}
@@ -1729,14 +1685,11 @@
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
- struct nfs4_mount_data *data = raw_data;
+ struct nfs_parsed_mount_data data;
struct super_block *s;
struct nfs_server *server;
- struct sockaddr_in addr;
- rpc_authflavor_t authflavour;
struct nfs_fh mntfh;
struct dentry *mntroot;
- char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
struct nfs_sb_mountdata sb_mntdata = {
.mntflags = flags,
@@ -1744,14 +1697,12 @@
int error;
/* Validate the mount data */
- error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
- &hostname, &mntpath, &ip_addr);
+ error = nfs4_validate_mount_data(raw_data, &data, dev_name);
if (error < 0)
goto out;
/* Get a volume representation */
- server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
- authflavour, &mntfh);
+ server = nfs4_create_server(&data, &mntfh);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out;
@@ -1790,9 +1741,9 @@
error = 0;
out:
- kfree(ip_addr);
- kfree(mntpath);
- kfree(hostname);
+ kfree(data.client_address);
+ kfree(data.nfs_server.export_path);
+ kfree(data.nfs_server.hostname);
return error;
out_free:
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 045ab80..1aed850 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -66,7 +66,6 @@
.rpc_cred = data->cred,
};
- nfs_begin_data_update(dir);
NFS_PROTO(dir)->unlink_setup(&msg, dir);
rpc_call_setup(task, &msg, 0);
}
@@ -84,8 +83,6 @@
if (!NFS_PROTO(dir)->unlink_done(task, dir))
rpc_restart_call(task);
- else
- nfs_end_data_update(dir);
}
/**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0d7a77c..e2bb66c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -110,6 +110,13 @@
nfs_writedata_free(wdata);
}
+static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
+{
+ ctx->error = error;
+ smp_wmb();
+ set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+}
+
static struct nfs_page *nfs_page_find_request_locked(struct page *page)
{
struct nfs_page *req = NULL;
@@ -243,10 +250,7 @@
/*
* Find an associated nfs write request, and prepare to flush it out
- * Returns 1 if there was no write request, or if the request was
- * already tagged by nfs_set_page_dirty.Returns 0 if the request
- * was not tagged.
- * May also return an error if the user signalled nfs_wait_on_request().
+ * May return an error if the user signalled nfs_wait_on_request().
*/
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct page *page)
@@ -261,7 +265,7 @@
req = nfs_page_find_request_locked(page);
if (req == NULL) {
spin_unlock(&inode->i_lock);
- return 1;
+ return 0;
}
if (nfs_lock_request_dontget(req))
break;
@@ -282,7 +286,7 @@
spin_unlock(&inode->i_lock);
nfs_unlock_request(req);
nfs_pageio_complete(pgio);
- return 1;
+ return 0;
}
if (nfs_set_page_writeback(page) != 0) {
spin_unlock(&inode->i_lock);
@@ -290,10 +294,20 @@
}
radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
NFS_PAGE_TAG_LOCKED);
- ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
spin_unlock(&inode->i_lock);
nfs_pageio_add_request(pgio, req);
- return ret;
+ return 0;
+}
+
+static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
+{
+ struct inode *inode = page->mapping->host;
+
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
+ nfs_pageio_cond_complete(pgio, page->index);
+ return nfs_page_async_flush(pgio, page);
}
/*
@@ -301,59 +315,35 @@
*/
static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
{
- struct nfs_pageio_descriptor mypgio, *pgio;
- struct nfs_open_context *ctx;
- struct inode *inode = page->mapping->host;
- unsigned offset;
+ struct nfs_pageio_descriptor pgio;
int err;
- nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
- nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
-
- if (wbc->for_writepages)
- pgio = wbc->fs_private;
- else {
- nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
- pgio = &mypgio;
- }
-
- nfs_pageio_cond_complete(pgio, page->index);
-
- err = nfs_page_async_flush(pgio, page);
- if (err <= 0)
- goto out;
- err = 0;
- offset = nfs_page_length(page);
- if (!offset)
- goto out;
-
- nfs_pageio_cond_complete(pgio, page->index);
-
- ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
- if (ctx == NULL) {
- err = -EBADF;
- goto out;
- }
- err = nfs_writepage_setup(ctx, page, 0, offset);
- put_nfs_open_context(ctx);
- if (err != 0)
- goto out;
- err = nfs_page_async_flush(pgio, page);
- if (err > 0)
- err = 0;
-out:
- if (!wbc->for_writepages)
- nfs_pageio_complete(pgio);
- return err;
+ nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+ err = nfs_do_writepage(page, wbc, &pgio);
+ nfs_pageio_complete(&pgio);
+ if (err < 0)
+ return err;
+ if (pgio.pg_error < 0)
+ return pgio.pg_error;
+ return 0;
}
int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
- int err;
+ int ret;
- err = nfs_writepage_locked(page, wbc);
+ ret = nfs_writepage_locked(page, wbc);
unlock_page(page);
- return err;
+ return ret;
+}
+
+static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
+{
+ int ret;
+
+ ret = nfs_do_writepage(page, wbc, data);
+ unlock_page(page);
+ return ret;
}
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
@@ -365,12 +355,11 @@
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
- wbc->fs_private = &pgio;
- err = generic_writepages(mapping, wbc);
+ err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
- if (err)
+ if (err < 0)
return err;
- if (pgio.pg_error)
+ if (pgio.pg_error < 0)
return pgio.pg_error;
return 0;
}
@@ -389,14 +378,11 @@
return error;
if (!nfsi->npages) {
igrab(inode);
- nfs_begin_data_update(inode);
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
SetPagePrivate(req->wb_page);
set_page_private(req->wb_page, (unsigned long)req);
- if (PageDirty(req->wb_page))
- set_bit(PG_NEED_FLUSH, &req->wb_flags);
nfsi->npages++;
kref_get(&req->wb_kref);
return 0;
@@ -416,12 +402,9 @@
set_page_private(req->wb_page, 0);
ClearPagePrivate(req->wb_page);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
- if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags))
- __set_page_dirty_nobuffers(req->wb_page);
nfsi->npages--;
if (!nfsi->npages) {
spin_unlock(&inode->i_lock);
- nfs_end_data_update(inode);
iput(inode);
} else
spin_unlock(&inode->i_lock);
@@ -682,7 +665,7 @@
int nfs_flush_incompatible(struct file *file, struct page *page)
{
- struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_page *req;
int do_flush, status;
/*
@@ -716,7 +699,7 @@
int nfs_updatepage(struct file *file, struct page *page,
unsigned int offset, unsigned int count)
{
- struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = page->mapping->host;
int status = 0;
@@ -967,7 +950,7 @@
if (task->tk_status < 0) {
nfs_set_pageerror(page);
- req->wb_context->error = task->tk_status;
+ nfs_context_set_write_error(req->wb_context, task->tk_status);
dprintk(", error = %d\n", task->tk_status);
goto out;
}
@@ -1030,7 +1013,7 @@
if (task->tk_status < 0) {
nfs_set_pageerror(page);
- req->wb_context->error = task->tk_status;
+ nfs_context_set_write_error(req->wb_context, task->tk_status);
dprintk(", error = %d\n", task->tk_status);
goto remove_request;
}
@@ -1244,7 +1227,7 @@
req->wb_bytes,
(long long)req_offset(req));
if (task->tk_status < 0) {
- req->wb_context->error = task->tk_status;
+ nfs_context_set_write_error(req->wb_context, task->tk_status);
nfs_inode_remove_request(req);
dprintk(", error = %d\n", task->tk_status);
goto next;
@@ -1347,53 +1330,52 @@
return ret;
}
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
+static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
{
- struct address_space *mapping = inode->i_mapping;
+ int ret;
+
+ ret = nfs_writepages(mapping, wbc);
+ if (ret < 0)
+ goto out;
+ ret = nfs_sync_mapping_wait(mapping, wbc, how);
+ if (ret < 0)
+ goto out;
+ return 0;
+out:
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ return ret;
+}
+
+/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
+static int nfs_write_mapping(struct address_space *mapping, int how)
+{
struct writeback_control wbc = {
.bdi = mapping->backing_dev_info,
- .sync_mode = WB_SYNC_ALL,
+ .sync_mode = WB_SYNC_NONE,
.nr_to_write = LONG_MAX,
.for_writepages = 1,
.range_cyclic = 1,
};
int ret;
- ret = nfs_writepages(mapping, &wbc);
+ ret = __nfs_write_mapping(mapping, &wbc, how);
if (ret < 0)
- goto out;
- ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
- if (ret >= 0)
- return 0;
-out:
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return ret;
+ return ret;
+ wbc.sync_mode = WB_SYNC_ALL;
+ return __nfs_write_mapping(mapping, &wbc, how);
}
-int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
{
- struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_start = range_start,
- .range_end = range_end,
- .for_writepages = 1,
- };
- int ret;
+ return nfs_write_mapping(inode->i_mapping, 0);
+}
- ret = nfs_writepages(mapping, &wbc);
- if (ret < 0)
- goto out;
- ret = nfs_sync_mapping_wait(mapping, &wbc, how);
- if (ret >= 0)
- return 0;
-out:
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return ret;
+int nfs_wb_nocommit(struct inode *inode)
+{
+ return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
}
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
@@ -1477,35 +1459,6 @@
return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
}
-int nfs_set_page_dirty(struct page *page)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode;
- struct nfs_page *req;
- int ret;
-
- if (!mapping)
- goto out_raced;
- inode = mapping->host;
- if (!inode)
- goto out_raced;
- spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(page);
- if (req != NULL) {
- /* Mark any existing write requests for flushing */
- ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
- spin_unlock(&inode->i_lock);
- nfs_release_request(req);
- return ret;
- }
- ret = __set_page_dirty_nobuffers(page);
- spin_unlock(&inode->i_lock);
- return ret;
-out_raced:
- return !TestSetPageDirty(page);
-}
-
-
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e15f2cf..5733394 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -102,7 +102,8 @@
out: \
return status; \
xdr_error: \
- printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ dprintk("NFSD: xdr error (%s:%d)\n", \
+ __FILE__, __LINE__); \
status = nfserr_bad_xdr; \
goto out
@@ -124,7 +125,8 @@
if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
savemem(argp, p, nbytes) : \
(char *)p)) { \
- printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ dprintk("NFSD: xdr error (%s:%d)\n", \
+ __FILE__, __LINE__); \
goto xdr_error; \
} \
p += XDR_QUADLEN(nbytes); \
@@ -140,7 +142,8 @@
p = argp->p; \
argp->p += XDR_QUADLEN(nbytes); \
} else if (!(p = read_buf(argp, nbytes))) { \
- printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ dprintk("NFSD: xdr error (%s:%d)\n", \
+ __FILE__, __LINE__); \
goto xdr_error; \
} \
} while (0)
@@ -948,7 +951,8 @@
*/
avail = (char*)argp->end - (char*)argp->p;
if (avail + argp->pagelen < write->wr_buflen) {
- printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
+ dprintk("NFSD: xdr error (%s:%d)\n",
+ __FILE__, __LINE__);
goto xdr_error;
}
argp->rqstp->rq_vec[0].iov_base = p;
@@ -1019,7 +1023,7 @@
argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
if (!argp->ops) {
argp->ops = argp->iops;
- printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n");
+ dprintk("nfsd: couldn't allocate room for COMPOUND\n");
goto xdr_error;
}
}
@@ -1326,7 +1330,7 @@
path = exp->ex_path;
if (strncmp(path, rootpath, strlen(rootpath))) {
- printk("nfsd: fs_locations failed;"
+ dprintk("nfsd: fs_locations failed;"
"%s is not contained in %s\n", path, rootpath);
*stat = nfserr_notsupp;
return NULL;
diff --git a/include/asm-blackfin/mach-bf548/bf54x_keys.h b/include/asm-blackfin/mach-bf548/bf54x_keys.h
new file mode 100644
index 0000000..1fb4ec7
--- /dev/null
+++ b/include/asm-blackfin/mach-bf548/bf54x_keys.h
@@ -0,0 +1,17 @@
+#ifndef _BFIN_KPAD_H
+#define _BFIN_KPAD_H
+
+struct bfin_kpad_platform_data {
+ int rows;
+ int cols;
+ const unsigned int *keymap;
+ unsigned short keymapsize;
+ unsigned short repeat;
+ u32 debounce_time; /* in ns */
+ u32 coldrive_time; /* in ns */
+ u32 keyup_test_interval; /* in ms */
+};
+
+#define KEYVAL(col, row, val) (((1 << col) << 24) | ((1 << row) << 16) | (val))
+
+#endif
diff --git a/include/asm-mips/mach-au1x00/prom.h b/include/asm-mips/mach-au1x00/prom.h
new file mode 100644
index 0000000..e387155
--- /dev/null
+++ b/include/asm-mips/mach-au1x00/prom.h
@@ -0,0 +1,13 @@
+#ifndef __AU1X00_PROM_H
+#define __AU1X00_PROM_H
+
+extern int prom_argc;
+extern char **prom_argv;
+extern char **prom_envp;
+
+extern void prom_init_cmdline(void);
+extern char *prom_getcmdline(void);
+extern char *prom_getenv(char *envname);
+extern int prom_get_ethernet_addr(char *ethernet_addr);
+
+#endif
diff --git a/include/asm-powerpc/dcr-mmio.h b/include/asm-powerpc/dcr-mmio.h
index 6b82c3b..08532ff 100644
--- a/include/asm-powerpc/dcr-mmio.h
+++ b/include/asm-powerpc/dcr-mmio.h
@@ -33,16 +33,16 @@
extern dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n,
unsigned int dcr_c);
-extern void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c);
+extern void dcr_unmap(dcr_host_t host, unsigned int dcr_c);
static inline u32 dcr_read(dcr_host_t host, unsigned int dcr_n)
{
- return in_be32(host.token + dcr_n * host.stride);
+ return in_be32(host.token + ((host.base + dcr_n) * host.stride));
}
static inline void dcr_write(dcr_host_t host, unsigned int dcr_n, u32 value)
{
- out_be32(host.token + dcr_n * host.stride, value);
+ out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
}
extern u64 of_translate_dcr_address(struct device_node *dev,
diff --git a/include/asm-powerpc/dcr-native.h b/include/asm-powerpc/dcr-native.h
index f41058c..8dbb1ab 100644
--- a/include/asm-powerpc/dcr-native.h
+++ b/include/asm-powerpc/dcr-native.h
@@ -29,9 +29,9 @@
#define DCR_MAP_OK(host) (1)
#define dcr_map(dev, dcr_n, dcr_c) ((dcr_host_t){ .base = (dcr_n) })
-#define dcr_unmap(host, dcr_n, dcr_c) do {} while (0)
-#define dcr_read(host, dcr_n) mfdcr(dcr_n)
-#define dcr_write(host, dcr_n, value) mtdcr(dcr_n, value)
+#define dcr_unmap(host, dcr_c) do {} while (0)
+#define dcr_read(host, dcr_n) mfdcr(dcr_n + host.base)
+#define dcr_write(host, dcr_n, value) mtdcr(dcr_n + host.base, value)
/* Device Control Registers */
void __mtdcr(int reg, unsigned int val);
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
index eff8585..d058b04 100644
--- a/include/asm-x86/irqflags_32.h
+++ b/include/asm-x86/irqflags_32.h
@@ -160,4 +160,17 @@
# define TRACE_IRQS_OFF
#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LOCKDEP_SYS_EXIT \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call lockdep_sys_exit; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+#else
+# define LOCKDEP_SYS_EXIT
+#endif
+
#endif
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
index 86e70fe..5341ea1 100644
--- a/include/asm-x86/irqflags_64.h
+++ b/include/asm-x86/irqflags_64.h
@@ -137,6 +137,20 @@
# define TRACE_IRQS_ON
# define TRACE_IRQS_OFF
# endif
+# ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
+# define LOCKDEP_SYS_EXIT_IRQ \
+ TRACE_IRQS_ON; \
+ sti; \
+ SAVE_REST; \
+ LOCKDEP_SYS_EXIT; \
+ RESTORE_REST; \
+ cli; \
+ TRACE_IRQS_OFF;
+# else
+# define LOCKDEP_SYS_EXIT
+# define LOCKDEP_SYS_EXIT_IRQ
+# endif
#endif
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16421f6..6d760f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1302,8 +1302,14 @@
struct module *owner;
struct file_system_type * next;
struct list_head fs_supers;
+
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
+
+ struct lock_class_key i_lock_key;
+ struct lock_class_key i_mutex_key;
+ struct lock_class_key i_mutex_dir_key;
+ struct lock_class_key i_alloc_sem_key;
};
extern int get_sb_bdev(struct file_system_type *fs_type,
diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index 265d178..c6d3a9d 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -8,6 +8,7 @@
int active_low;
char *desc;
int type; /* input event type (EV_KEY, EV_SW) */
+ int wakeup; /* configure the button as a wake-up source */
};
struct gpio_keys_platform_data {
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 99e3a1a..58e43e5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -107,7 +107,7 @@
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
struct sk_buff *skb);
-extern int (*br_should_route_hook)(struct sk_buff **pskb);
+extern int (*br_should_route_hook)(struct sk_buff *skb);
#endif
diff --git a/include/linux/input.h b/include/linux/input.h
index 52d1bd4..f30da6f 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -856,7 +856,7 @@
* defining effect parameters
*
* This structure is sent through ioctl from the application to the driver.
- * To create a new effect aplication should set its @id to -1; the kernel
+ * To create a new effect application should set its @id to -1; the kernel
* will return assigned @id which can later be used to update or delete
* this effect.
*
@@ -936,9 +936,82 @@
#define BIT(x) (1UL<<((x)%BITS_PER_LONG))
#define LONG(x) ((x)/BITS_PER_LONG)
+/**
+ * struct input_dev - represents an input device
+ * @name: name of the device
+ * @phys: physical path to the device in the system hierarchy
+ * @uniq: unique identification code for the device (if device has it)
+ * @id: id of the device (struct input_id)
+ * @evbit: bitmap of types of events supported by the device (EV_KEY,
+ * EV_REL, etc.)
+ * @keybit: bitmap of keys/buttons this device has
+ * @relbit: bitmap of relative axes for the device
+ * @absbit: bitmap of absolute axes for the device
+ * @mscbit: bitmap of miscellaneous events supported by the device
+ * @ledbit: bitmap of leds present on the device
+ * @sndbit: bitmap of sound effects supported by the device
+ * @ffbit: bitmap of force feedback effects supported by the device
+ * @swbit: bitmap of switches present on the device
+ * @keycodemax: size of keycode table
+ * @keycodesize: size of elements in keycode table
+ * @keycode: map of scancodes to keycodes for this device
+ * @setkeycode: optional method to alter current keymap, used to implement
+ * sparse keymaps. If not supplied default mechanism will be used
+ * @getkeycode: optional method to retrieve current keymap. If not supplied
+ * default mechanism will be used
+ * @ff: force feedback structure associated with the device if device
+ * supports force feedback effects
+ * @repeat_key: stores key code of the last key pressed; used to implement
+ * software autorepeat
+ * @timer: timer for software autorepeat
+ * @sync: set to 1 when there were no new events since last EV_SYNC
+ * @abs: current values for reports from absolute axes
+ * @rep: current values for autorepeat parameters (delay, rate)
+ * @key: reflects current state of device's keys/buttons
+ * @led: reflects current state of device's LEDs
+ * @snd: reflects current state of sound effects
+ * @sw: reflects current state of device's switches
+ * @absmax: maximum values for events coming from absolute axes
+ * @absmin: minimum values for events coming from absolute axes
+ * @absfuzz: describes noisiness for axes
+ * @absflat: size of the center flat position (used by joydev)
+ * @open: this method is called when the very first user calls
+ * input_open_device(). The driver must prepare the device
+ * to start generating events (start polling thread,
+ * request an IRQ, submit URB, etc.)
+ * @close: this method is called when the very last user calls
+ * input_close_device().
+ * @flush: purges the device. Most commonly used to get rid of force
+ * feedback effects loaded into the device when disconnecting
+ * from it
+ * @event: event handler for events sent _to_ the device, like EV_LED
+ * or EV_SND. The device is expected to carry out the requested
+ * action (turn on a LED, play sound, etc.) The call is protected
+ * by @event_lock and must not sleep
+ * @grab: input handle that currently has the device grabbed (via
+ * EVIOCGRAB ioctl). When a handle grabs a device it becomes sole
+ * recipient for all input events coming from the device
+ * @event_lock: this spinlock is is taken when input core receives
+ * and processes a new event for the device (in input_event()).
+ * Code that accesses and/or modifies parameters of a device
+ * (such as keymap or absmin, absmax, absfuzz, etc.) after device
+ * has been registered with input core must take this lock.
+ * @mutex: serializes calls to open(), close() and flush() methods
+ * @users: stores number of users (input handlers) that opened this
+ * device. It is used by input_open_device() and input_close_device()
+ * to make sure that dev->open() is only called when the first
+ * user opens device and dev->close() is called when the very
+ * last user closes the device
+ * @going_away: marks devices that are in a middle of unregistering and
+ * causes input_open_device*() fail with -ENODEV.
+ * @dev: driver model's view of this device
+ * @h_list: list of input handles associated with the device. When
+ * accessing the list dev->mutex must be held
+ * @node: used to place the device onto input_dev_list
+ */
struct input_dev {
- void *private;
+ void *private; /* do not use */
const char *name;
const char *phys;
@@ -966,8 +1039,6 @@
unsigned int repeat_key;
struct timer_list timer;
- int state;
-
int sync;
int abs[ABS_MAX + 1];
@@ -990,8 +1061,11 @@
struct input_handle *grab;
- struct mutex mutex; /* serializes open and close operations */
+ spinlock_t event_lock;
+ struct mutex mutex;
+
unsigned int users;
+ int going_away;
struct device dev;
union { /* temporarily so while we switching to struct device */
@@ -1057,7 +1131,9 @@
/**
* struct input_handler - implements one of interfaces for input devices
* @private: driver-specific data
- * @event: event handler
+ * @event: event handler. This method is being called by input core with
+ * interrupts disabled and dev->event_lock spinlock held and so
+ * it may not sleep
* @connect: called when attaching a handler to an input device
* @disconnect: disconnects a handler from input device
* @start: starts handler for given handle. This function is called by
@@ -1069,10 +1145,18 @@
* @name: name of the handler, to be shown in /proc/bus/input/handlers
* @id_table: pointer to a table of input_device_ids this driver can
* handle
- * @blacklist: prointer to a table of input_device_ids this driver should
+ * @blacklist: pointer to a table of input_device_ids this driver should
* ignore even if they match @id_table
* @h_list: list of input handles associated with the handler
* @node: for placing the driver onto input_handler_list
+ *
+ * Input handlers attach to input devices and create input handles. There
+ * are likely several handlers attached to any given input device at the
+ * same time. All of them will get their copy of input event generated by
+ * the device.
+ *
+ * Note that input core serializes calls to connect() and disconnect()
+ * methods.
*/
struct input_handler {
@@ -1094,6 +1178,18 @@
struct list_head node;
};
+/**
+ * struct input_handle - links input device with an input handler
+ * @private: handler-specific data
+ * @open: counter showing whether the handle is 'open', i.e. should deliver
+ * events from its device
+ * @name: name given to the handle by handler that created it
+ * @dev: input device the handle is attached to
+ * @handler: handler that works with the device through this handle
+ * @d_node: used to put the handle on device's list of attached handles
+ * @h_node: used to put the handle on handler's list of handles from which
+ * it gets events
+ */
struct input_handle {
void *private;
@@ -1136,10 +1232,10 @@
dev->private = data;
}
-int input_register_device(struct input_dev *);
+int __must_check input_register_device(struct input_dev *);
void input_unregister_device(struct input_dev *);
-int input_register_handler(struct input_handler *);
+int __must_check input_register_handler(struct input_handler *);
void input_unregister_handler(struct input_handler *);
int input_register_handle(struct input_handle *);
@@ -1216,7 +1312,7 @@
* @max_effects: maximum number of effects supported by device
* @effects: pointer to an array of effects currently loaded into device
* @effect_owners: array of effect owners; when file handle owning
- * an effect gets closed the effcet is automatically erased
+ * an effect gets closed the effect is automatically erased
*
* Every force-feedback device must implement upload() and playback()
* methods; erase() is optional. set_gain() and set_autocenter() need
diff --git a/include/linux/isdn.h b/include/linux/isdn.h
index ad09506..d5dda4b 100644
--- a/include/linux/isdn.h
+++ b/include/linux/isdn.h
@@ -286,7 +286,6 @@
/* Local interface-data */
typedef struct isdn_net_local_s {
ulong magic;
- char name[10]; /* Name of device */
struct net_device_stats stats; /* Ethernet Statistics */
int isdn_device; /* Index to isdn-device */
int isdn_channel; /* Index to isdn-channel */
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 4527375..700a93b 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -30,6 +30,7 @@
#include <linux/bit_spinlock.h>
#include <linux/mutex.h>
#include <linux/timer.h>
+#include <linux/lockdep.h>
#include <asm/semaphore.h>
#endif
@@ -396,6 +397,10 @@
unsigned int h_sync: 1; /* sync-on-close */
unsigned int h_jdata: 1; /* force data journaling */
unsigned int h_aborted: 1; /* fatal error on handle */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map h_lockdep_map;
+#endif
};
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index d7a5e03..e757a74 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -109,6 +109,10 @@
((long)(a) - (long)(b) >= 0))
#define time_before_eq(a,b) time_after_eq(b,a)
+#define time_in_range(a,b,c) \
+ (time_after_eq(a,b) && \
+ time_before_eq(a,c))
+
/* Same as above, but does so with platform independent 64bit types.
* These must be used when utilizing jiffies_64 (i.e. return value of
* get_jiffies_64() */
diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index 7ddbc30..33b5c2e 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -416,6 +416,7 @@
#define K_SHIFTRLOCK K(KT_LOCK,KG_SHIFTR)
#define K_CTRLLLOCK K(KT_LOCK,KG_CTRLL)
#define K_CTRLRLOCK K(KT_LOCK,KG_CTRLR)
+#define K_CAPSSHIFTLOCK K(KT_LOCK,KG_CAPSSHIFT)
#define K_SHIFT_SLOCK K(KT_SLOCK,KG_SHIFT)
#define K_CTRL_SLOCK K(KT_SLOCK,KG_CTRL)
@@ -425,8 +426,9 @@
#define K_SHIFTR_SLOCK K(KT_SLOCK,KG_SHIFTR)
#define K_CTRLL_SLOCK K(KT_SLOCK,KG_CTRLL)
#define K_CTRLR_SLOCK K(KT_SLOCK,KG_CTRLR)
+#define K_CAPSSHIFT_SLOCK K(KT_SLOCK,KG_CAPSSHIFT)
-#define NR_LOCK 8
+#define NR_LOCK 9
#define K_BRL_BLANK K(KT_BRL, 0)
#define K_BRL_DOT1 K(KT_BRL, 1)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0e843bf..f6279f6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -238,6 +238,7 @@
extern void lockdep_reset(void);
extern void lockdep_reset_lock(struct lockdep_map *lock);
extern void lockdep_free_key_range(void *start, unsigned long size);
+extern void lockdep_sys_exit(void);
extern void lockdep_off(void);
extern void lockdep_on(void);
@@ -252,6 +253,13 @@
struct lock_class_key *key, int subclass);
/*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+ { .name = (_name), .key = (void *)(_key), }
+
+/*
* Reinitialize a lock key - for cases where there is special locking or
* special initialization of locks so that the validator gets the scope
* of dependencies wrong: they are either too broad (they need a class-split)
@@ -317,6 +325,7 @@
# define INIT_LOCKDEP
# define lockdep_reset() do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size) do { } while (0)
+# define lockdep_sys_exit() do { } while (0)
/*
* The class key takes no space if lockdep is disabled:
*/
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 0d50ea3..6a735c7 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -120,14 +120,17 @@
* See kernel/mutex.c for detailed documentation of these APIs.
* Also see Documentation/mutex-design.txt.
*/
-extern void fastcall mutex_lock(struct mutex *lock);
-extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
unsigned int subclass);
+
+#define mutex_lock(lock) mutex_lock_nested(lock, 0)
+#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
#else
+extern void fastcall mutex_lock(struct mutex *lock);
+extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
+
# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
#endif
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 1dd075e..16adac6 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -51,7 +51,7 @@
struct net_device;
typedef unsigned int nf_hookfn(unsigned int hooknum,
- struct sk_buff **skb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *));
@@ -183,7 +183,7 @@
struct nf_loginfo *li,
const char *fmt, ...);
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev, struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh);
@@ -195,7 +195,7 @@
* value indicates the packet has been consumed by the hook.
*/
static inline int nf_hook_thresh(int pf, unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh,
@@ -207,14 +207,14 @@
if (list_empty(&nf_hooks[pf][hook]))
return 1;
#endif
- return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
+ return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}
-static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
+static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev, struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
- return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1);
+ return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1);
}
/* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -241,13 +241,13 @@
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
({int __ret; \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
__ret = (okfn)(skb); \
__ret;})
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \
({int __ret; \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
__ret = (okfn)(skb); \
__ret;})
@@ -287,7 +287,7 @@
/* Call this before modifying an existing packet: ensures it is
modifiable and linear to the point you care about (writable_len).
Returns true or false. */
-extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
+extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
static inline void nf_csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
@@ -317,7 +317,7 @@
unsigned int dataoff, u_int8_t protocol);
void (*saveroute)(const struct sk_buff *skb,
struct nf_info *info);
- int (*reroute)(struct sk_buff **skb,
+ int (*reroute)(struct sk_buff *skb,
const struct nf_info *info);
int route_key_size;
};
@@ -371,15 +371,15 @@
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
static inline int nf_hook_thresh(int pf, unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh,
int cond)
{
- return okfn(*pskb);
+ return okfn(skb);
}
-static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
+static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev, struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h
index 26c2235..0bb5a69 100644
--- a/include/linux/netfilter/nf_conntrack_amanda.h
+++ b/include/linux/netfilter/nf_conntrack_amanda.h
@@ -2,7 +2,7 @@
#define _NF_CONNTRACK_AMANDA_H
/* AMANDA tracking. */
-extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index b7c360f..47727d7 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -32,7 +32,7 @@
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
-extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
unsigned int matchoff,
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index 08e2f49..aabd24a 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -36,27 +36,27 @@
struct nf_conntrack_expect *this);
extern void nf_conntrack_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this);
-extern int (*set_h245_addr_hook) (struct sk_buff **pskb,
+extern int (*set_h245_addr_hook) (struct sk_buff *skb,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_conntrack_address *addr,
__be16 port);
-extern int (*set_h225_addr_hook) (struct sk_buff **pskb,
+extern int (*set_h225_addr_hook) (struct sk_buff *skb,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_conntrack_address *addr,
__be16 port);
-extern int (*set_sig_addr_hook) (struct sk_buff **pskb,
+extern int (*set_sig_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count);
-extern int (*set_ras_addr_hook) (struct sk_buff **pskb,
+extern int (*set_ras_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count);
-extern int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -64,24 +64,24 @@
__be16 port, __be16 rtp_port,
struct nf_conntrack_expect *rtp_exp,
struct nf_conntrack_expect *rtcp_exp);
-extern int (*nat_t120_hook) (struct sk_buff **pskb, struct nf_conn *ct,
+extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp);
-extern int (*nat_h245_hook) (struct sk_buff **pskb, struct nf_conn *ct,
+extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp);
-extern int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+extern int (*nat_callforwarding_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr,
__be16 port,
struct nf_conntrack_expect *exp);
-extern int (*nat_q931_hook) (struct sk_buff **pskb, struct nf_conn *ct,
+extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, TransportAddress *taddr,
int idx, __be16 port,
diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h
index 2ab6b82..36282bf 100644
--- a/include/linux/netfilter/nf_conntrack_irc.h
+++ b/include/linux/netfilter/nf_conntrack_irc.h
@@ -5,7 +5,7 @@
#define IRC_PORT 6667
-extern unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index c93061f..2343549 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -301,13 +301,13 @@
struct nf_conntrack_expect;
extern int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq);
extern int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq);
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index bb7f204..9fff197 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -21,11 +21,11 @@
POS_SDP_HEADER,
};
-extern unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr);
-extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp,
const char *dptr);
diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h
index 0d79b7a..c78d38f 100644
--- a/include/linux/netfilter/nf_conntrack_tftp.h
+++ b/include/linux/netfilter/nf_conntrack_tftp.h
@@ -13,7 +13,7 @@
#define TFTP_OPCODE_ACK 4
#define TFTP_OPCODE_ERROR 5
-extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp);
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 64f425a8..03e6ce9 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -191,7 +191,7 @@
/* Returns verdict. Argument order changed since 2.6.9, as this
must now handle non-linear skbs, using skb_copy_bits and
skb_ip_make_writable. */
- unsigned int (*target)(struct sk_buff **pskb,
+ unsigned int (*target)(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 584cd1b..2fc73fa 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -287,7 +287,7 @@
extern int arpt_register_table(struct arpt_table *table,
const struct arpt_replace *repl);
extern void arpt_unregister_table(struct arpt_table *table);
-extern unsigned int arpt_do_table(struct sk_buff **pskb,
+extern unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 94e0a7d..892f5b7 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -237,7 +237,7 @@
struct list_head list;
const char name[EBT_FUNCTION_MAXNAMELEN];
/* returns one of the standard verdicts */
- int (*target)(struct sk_buff **pskb, unsigned int hooknr,
+ int (*target)(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *targetdata, unsigned int datalen);
/* 0 == let it in */
@@ -294,7 +294,7 @@
extern void ebt_unregister_watcher(struct ebt_watcher *watcher);
extern int ebt_register_target(struct ebt_target *target);
extern void ebt_unregister_target(struct ebt_target *target);
-extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff **pskb,
+extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
struct ebt_table *table);
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index ceae87a..1a63adf 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -75,8 +75,8 @@
#define SO_ORIGINAL_DST 80
#ifdef __KERNEL__
-extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
-extern int ip_xfrm_me_harder(struct sk_buff **pskb);
+extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
+extern int ip_xfrm_me_harder(struct sk_buff *skb);
extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
#endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index e992cd6..d79ed69 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -337,7 +337,7 @@
.target.errorname = "ERROR", \
}
-extern unsigned int ipt_do_table(struct sk_buff **pskb,
+extern unsigned int ipt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 9a720f0..7dc481c 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -336,7 +336,7 @@
extern int ip6t_register_table(struct xt_table *table,
const struct ip6t_replace *repl);
extern void ip6t_unregister_table(struct xt_table *table);
-extern unsigned int ip6t_do_table(struct sk_buff **pskb,
+extern unsigned int ip6t_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7250eea..c5164c2 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -47,10 +47,8 @@
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/nfs_xdr.h>
-
#include <linux/nfs_fs_sb.h>
-#include <linux/rwsem.h>
#include <linux/mempool.h>
/*
@@ -77,6 +75,9 @@
struct nfs4_state *state;
fl_owner_t lockowner;
int mode;
+
+ unsigned long flags;
+#define NFS_CONTEXT_ERROR_WRITE (0)
int error;
struct list_head list;
@@ -133,11 +134,6 @@
* server.
*/
unsigned long cache_change_attribute;
- /*
- * Counter indicating the number of outstanding requests that
- * will cause a file data update.
- */
- atomic_t data_updates;
struct rb_root access_cache;
struct list_head access_cache_entry_lru;
@@ -205,27 +201,18 @@
#define NFS_CLIENT(inode) (NFS_SERVER(inode)->client)
#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops)
#define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf)
-#define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies)
-#define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr)
-#define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo)
#define NFS_MINATTRTIMEO(inode) \
(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
: NFS_SERVER(inode)->acregmin)
#define NFS_MAXATTRTIMEO(inode) \
(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
: NFS_SERVER(inode)->acregmax)
-#define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp)
#define NFS_FLAGS(inode) (NFS_I(inode)->flags)
#define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
#define NFS_FILEID(inode) (NFS_I(inode)->fileid)
-static inline int nfs_caches_unstable(struct inode *inode)
-{
- return atomic_read(&NFS_I(inode)->data_updates) != 0;
-}
-
static inline void nfs_mark_for_revalidate(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -237,12 +224,6 @@
spin_unlock(&inode->i_lock);
}
-static inline void NFS_CACHEINV(struct inode *inode)
-{
- if (!nfs_caches_unstable(inode))
- nfs_mark_for_revalidate(inode);
-}
-
static inline int nfs_server_capable(struct inode *inode, int cap)
{
return NFS_SERVER(inode)->caps & cap;
@@ -253,28 +234,33 @@
return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
}
-/**
- * nfs_save_change_attribute - Returns the inode attribute change cookie
- * @inode - pointer to inode
- * The "change attribute" is updated every time we finish an operation
- * that will result in a metadata change on the server.
- */
-static inline long nfs_save_change_attribute(struct inode *inode)
+static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
{
- return NFS_I(inode)->cache_change_attribute;
+ dentry->d_time = verf;
}
/**
- * nfs_verify_change_attribute - Detects NFS inode cache updates
- * @inode - pointer to inode
- * @chattr - previously saved change attribute
- * Return "false" if metadata has been updated (or is in the process of
- * being updated) since the change attribute was saved.
+ * nfs_save_change_attribute - Returns the inode attribute change cookie
+ * @dir - pointer to parent directory inode
+ * The "change attribute" is updated every time we finish an operation
+ * that will result in a metadata change on the server.
*/
-static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
+static inline unsigned long nfs_save_change_attribute(struct inode *dir)
{
- return !nfs_caches_unstable(inode)
- && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute);
+ return NFS_I(dir)->cache_change_attribute;
+}
+
+/**
+ * nfs_verify_change_attribute - Detects NFS remote directory changes
+ * @dir - pointer to parent directory inode
+ * @chattr - previously saved change attribute
+ * Return "false" if the verifiers doesn't match the change attribute.
+ * This would usually indicate that the directory contents have changed on
+ * the server, and that any dentries need revalidating.
+ */
+static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr)
+{
+ return chattr == NFS_I(dir)->cache_change_attribute;
}
/*
@@ -283,15 +269,14 @@
extern int nfs_sync_mapping(struct address_space *mapping);
extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping);
extern void nfs_zap_caches(struct inode *);
+extern void nfs_invalidate_atime(struct inode *);
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
struct nfs_fattr *);
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
+extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int nfs_permission(struct inode *, int, struct nameidata *);
-extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
-extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
-extern void nfs_access_zap_cache(struct inode *inode);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);
extern int nfs_attribute_timeout(struct inode *inode);
@@ -301,13 +286,10 @@
extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
extern int nfs_setattr(struct dentry *, struct iattr *);
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
-extern void nfs_begin_attr_update(struct inode *);
-extern void nfs_end_attr_update(struct inode *);
-extern void nfs_begin_data_update(struct inode *);
-extern void nfs_end_data_update(struct inode *);
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
extern void put_nfs_open_context(struct nfs_open_context *ctx);
extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
+extern u64 nfs_compat_user_ino64(u64 fileid);
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
extern __be32 root_nfs_parse_addr(char *name); /*__init*/
@@ -328,14 +310,15 @@
extern const struct file_operations nfs_file_operations;
extern const struct address_space_operations nfs_file_aops;
+static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
+{
+ return filp->private_data;
+}
+
static inline struct rpc_cred *nfs_file_cred(struct file *file)
{
- if (file != NULL) {
- struct nfs_open_context *ctx;
-
- ctx = (struct nfs_open_context*)file->private_data;
- return ctx->cred;
- }
+ if (file != NULL)
+ return nfs_file_open_context(file)->cred;
return NULL;
}
@@ -378,6 +361,8 @@
extern struct dentry_operations nfs_dentry_operations;
extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
+extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
+extern void nfs_access_zap_cache(struct inode *inode);
/*
* linux/fs/nfs/symlink.c
@@ -420,15 +405,14 @@
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
extern void nfs_writedata_release(void *);
-extern int nfs_set_page_dirty(struct page *);
/*
* Try to write back everything synchronously (but check the
* return value!)
*/
extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
-extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int);
extern int nfs_wb_all(struct inode *inode);
+extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 78e6079..30dbcc1 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -30,7 +30,6 @@
#define PG_BUSY 0
#define PG_NEED_COMMIT 1
#define PG_NEED_RESCHED 2
-#define PG_NEED_FLUSH 3
struct nfs_inode;
struct nfs_page {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index cf74a4d..daab252 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -62,7 +62,8 @@
#define NFS_ATTR_FATTR 0x0002 /* post-op attributes */
#define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */
#define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */
-#define NFS_ATTR_FATTR_V4_REFERRAL 0x0010 /* NFSv4 referral */
+#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */
+#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */
/*
* Info on the file system
@@ -538,10 +539,13 @@
struct nfs4_accessargs {
const struct nfs_fh * fh;
+ const u32 * bitmask;
u32 access;
};
struct nfs4_accessres {
+ const struct nfs_server * server;
+ struct nfs_fattr * fattr;
u32 supported;
u32 access;
};
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fe17d7d..76c1a53 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,6 +41,7 @@
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/seqlock.h>
+#include <linux/lockdep.h>
/**
* struct rcu_head - callback structure for use with RCU
@@ -133,6 +134,15 @@
extern int rcu_pending(int cpu);
extern int rcu_needs_cpu(int cpu);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
+# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire() do { } while (0)
+# define rcu_read_release() do { } while (0)
+#endif
+
/**
* rcu_read_lock - mark the beginning of an RCU read-side critical section.
*
@@ -166,6 +176,7 @@
do { \
preempt_disable(); \
__acquire(RCU); \
+ rcu_read_acquire(); \
} while(0)
/**
@@ -175,6 +186,7 @@
*/
#define rcu_read_unlock() \
do { \
+ rcu_read_release(); \
__release(RCU); \
preempt_enable(); \
} while(0)
@@ -204,6 +216,7 @@
do { \
local_bh_disable(); \
__acquire(RCU_BH); \
+ rcu_read_acquire(); \
} while(0)
/*
@@ -213,6 +226,7 @@
*/
#define rcu_read_unlock_bh() \
do { \
+ rcu_read_release(); \
__release(RCU_BH); \
local_bh_enable(); \
} while(0)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a656cec..f93f22b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -357,6 +357,7 @@
}
extern void kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
extern struct sk_buff *skb_clone(struct sk_buff *skb,
gfp_t priority);
extern struct sk_buff *skb_copy(const struct sk_buff *skb,
@@ -1781,6 +1782,11 @@
return skb_shinfo(skb)->gso_size;
}
+static inline int skb_is_gso_v6(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
+}
+
static inline void skb_forward_csum(struct sk_buff *skb)
{
/* Unfortunately we don't support this one. Any brave souls? */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c0d9d14..d9d5c5a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -117,7 +117,7 @@
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
- struct rpc_program *, int);
+ struct rpc_program *, u32);
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
void rpc_shutdown_client(struct rpc_clnt *);
void rpc_release_client(struct rpc_clnt *);
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 3912cf1..3347c72 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -88,6 +88,11 @@
CTL_SLOTTABLE_TCP,
CTL_MIN_RESVPORT,
CTL_MAX_RESVPORT,
+ CTL_SLOTTABLE_RDMA,
+ CTL_RDMA_MAXINLINEREAD,
+ CTL_RDMA_MAXINLINEWRITE,
+ CTL_RDMA_WRITEPADDING,
+ CTL_RDMA_MEMREG,
};
#endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 784d4c3..c4beb57 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -138,6 +138,19 @@
#define RPC_MAX_HEADER_WITH_AUTH \
(RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4))
+/*
+ * RFC1833/RFC3530 rpcbind (v3+) well-known netid's.
+ */
+#define RPCBIND_NETID_UDP "udp"
+#define RPCBIND_NETID_TCP "tcp"
+#define RPCBIND_NETID_UDP6 "udp6"
+#define RPCBIND_NETID_TCP6 "tcp6"
+
+/*
+ * Note that RFC 1833 does not put any size restrictions on the
+ * netid string, but all currently defined netid's fit in 4 bytes.
+ */
+#define RPCBIND_MAXNETIDLEN (4u)
#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_MSGPROT_H_ */
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
new file mode 100644
index 0000000..0013a0d
--- /dev/null
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_RPC_RDMA_H
+#define _LINUX_SUNRPC_RPC_RDMA_H
+
+struct rpcrdma_segment {
+ uint32_t rs_handle; /* Registered memory handle */
+ uint32_t rs_length; /* Length of the chunk in bytes */
+ uint64_t rs_offset; /* Chunk virtual address or offset */
+};
+
+/*
+ * read chunk(s), encoded as a linked list.
+ */
+struct rpcrdma_read_chunk {
+ uint32_t rc_discrim; /* 1 indicates presence */
+ uint32_t rc_position; /* Position in XDR stream */
+ struct rpcrdma_segment rc_target;
+};
+
+/*
+ * write chunk, and reply chunk.
+ */
+struct rpcrdma_write_chunk {
+ struct rpcrdma_segment wc_target;
+};
+
+/*
+ * write chunk(s), encoded as a counted array.
+ */
+struct rpcrdma_write_array {
+ uint32_t wc_discrim; /* 1 indicates presence */
+ uint32_t wc_nchunks; /* Array count */
+ struct rpcrdma_write_chunk wc_array[0];
+};
+
+struct rpcrdma_msg {
+ uint32_t rm_xid; /* Mirrors the RPC header xid */
+ uint32_t rm_vers; /* Version of this protocol */
+ uint32_t rm_credit; /* Buffers requested/granted */
+ uint32_t rm_type; /* Type of message (enum rpcrdma_proc) */
+ union {
+
+ struct { /* no chunks */
+ uint32_t rm_empty[3]; /* 3 empty chunk lists */
+ } rm_nochunks;
+
+ struct { /* no chunks and padded */
+ uint32_t rm_align; /* Padding alignment */
+ uint32_t rm_thresh; /* Padding threshold */
+ uint32_t rm_pempty[3]; /* 3 empty chunk lists */
+ } rm_padded;
+
+ uint32_t rm_chunks[0]; /* read, write and reply chunks */
+
+ } rm_body;
+};
+
+#define RPCRDMA_HDRLEN_MIN 28
+
+enum rpcrdma_errcode {
+ ERR_VERS = 1,
+ ERR_CHUNK = 2
+};
+
+struct rpcrdma_err_vers {
+ uint32_t rdma_vers_low; /* Version range supported by peer */
+ uint32_t rdma_vers_high;
+};
+
+enum rpcrdma_proc {
+ RDMA_MSG = 0, /* An RPC call or reply msg */
+ RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */
+ RDMA_MSGP = 2, /* An RPC call or reply msg with padding */
+ RDMA_DONE = 3, /* Client signals reply completion */
+ RDMA_ERROR = 4 /* An RPC RDMA encoding error */
+};
+
+#endif /* _LINUX_SUNRPC_RPC_RDMA_H */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index c6b53d1..0751c94 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -70,7 +70,10 @@
struct page ** pages; /* Array of contiguous pages */
unsigned int page_base, /* Start of page data */
- page_len; /* Length of page data */
+ page_len, /* Length of page data */
+ flags; /* Flags for data disposition */
+#define XDRBUF_READ 0x01 /* target of file read */
+#define XDRBUF_WRITE 0x02 /* source of file write */
unsigned int buflen, /* Total length of storage buffer */
len; /* Length of XDR encoded message */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index d11cedd..30b17b3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -19,25 +19,11 @@
#ifdef __KERNEL__
-extern unsigned int xprt_udp_slot_table_entries;
-extern unsigned int xprt_tcp_slot_table_entries;
-
#define RPC_MIN_SLOT_TABLE (2U)
#define RPC_DEF_SLOT_TABLE (16U)
#define RPC_MAX_SLOT_TABLE (128U)
/*
- * Parameters for choosing a free port
- */
-extern unsigned int xprt_min_resvport;
-extern unsigned int xprt_max_resvport;
-
-#define RPC_MIN_RESVPORT (1U)
-#define RPC_MAX_RESVPORT (65535U)
-#define RPC_DEF_MIN_RESVPORT (665U)
-#define RPC_DEF_MAX_RESVPORT (1023U)
-
-/*
* This describes a timeout strategy
*/
struct rpc_timeout {
@@ -53,6 +39,10 @@
RPC_DISPLAY_PORT,
RPC_DISPLAY_PROTO,
RPC_DISPLAY_ALL,
+ RPC_DISPLAY_HEX_ADDR,
+ RPC_DISPLAY_HEX_PORT,
+ RPC_DISPLAY_UNIVERSAL_ADDR,
+ RPC_DISPLAY_NETID,
RPC_DISPLAY_MAX,
};
@@ -196,14 +186,22 @@
char * address_strings[RPC_DISPLAY_MAX];
};
-struct rpc_xprtsock_create {
- int proto; /* IPPROTO_UDP or IPPROTO_TCP */
+struct xprt_create {
+ int ident; /* XPRT_TRANSPORT identifier */
struct sockaddr * srcaddr; /* optional local address */
struct sockaddr * dstaddr; /* remote peer address */
size_t addrlen;
struct rpc_timeout * timeout; /* optional timeout parameters */
};
+struct xprt_class {
+ struct list_head list;
+ int ident; /* XPRT_TRANSPORT identifier */
+ struct rpc_xprt * (*setup)(struct xprt_create *);
+ struct module *owner;
+ char name[32];
+};
+
/*
* Transport operations used by ULPs
*/
@@ -212,7 +210,7 @@
/*
* Generic internal transport functions
*/
-struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args);
+struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_task *task);
@@ -235,6 +233,8 @@
/*
* Transport switch helper functions
*/
+int xprt_register_transport(struct xprt_class *type);
+int xprt_unregister_transport(struct xprt_class *type);
void xprt_set_retrans_timeout_def(struct rpc_task *task);
void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
@@ -248,14 +248,6 @@
void xprt_disconnect(struct rpc_xprt *xprt);
/*
- * Socket transport setup operations
- */
-struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args);
-struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args);
-int init_socket_xprt(void);
-void cleanup_socket_xprt(void);
-
-/*
* Reserved bit positions in xprt->state
*/
#define XPRT_LOCKED (0)
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
new file mode 100644
index 0000000..4de56b1
--- /dev/null
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRTRDMA_H
+#define _LINUX_SUNRPC_XPRTRDMA_H
+
+/*
+ * RPC transport identifier for RDMA
+ */
+#define XPRT_TRANSPORT_RDMA 256
+
+/*
+ * rpcbind (v3+) RDMA netid.
+ */
+#define RPCBIND_NETID_RDMA "rdma"
+
+/*
+ * Constants. Max RPC/NFS header is big enough to account for
+ * additional marshaling buffers passed down by Linux client.
+ *
+ * RDMA header is currently fixed max size, and is big enough for a
+ * fully-chunked NFS message (read chunks are the largest). Note only
+ * a single chunk type per message is supported currently.
+ */
+#define RPCRDMA_MIN_SLOT_TABLE (2U)
+#define RPCRDMA_DEF_SLOT_TABLE (32U)
+#define RPCRDMA_MAX_SLOT_TABLE (256U)
+
+#define RPCRDMA_DEF_INLINE (1024) /* default inline max */
+
+#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */
+
+#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+
+/* memory registration strategies */
+#define RPCRDMA_PERSISTENT_REGISTRATION (1)
+
+enum rpcrdma_memreg {
+ RPCRDMA_BOUNCEBUFFERS = 0,
+ RPCRDMA_REGISTER,
+ RPCRDMA_MEMWINDOWS,
+ RPCRDMA_MEMWINDOWS_ASYNC,
+ RPCRDMA_MTHCAFMR,
+ RPCRDMA_ALLPHYSICAL,
+ RPCRDMA_LAST
+};
+
+#endif /* _LINUX_SUNRPC_XPRTRDMA_H */
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
new file mode 100644
index 0000000..2c6c2c2
--- /dev/null
+++ b/include/linux/sunrpc/xprtsock.h
@@ -0,0 +1,51 @@
+/*
+ * linux/include/linux/sunrpc/xprtsock.h
+ *
+ * Declarations for the RPC transport socket provider.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRTSOCK_H
+#define _LINUX_SUNRPC_XPRTSOCK_H
+
+#ifdef __KERNEL__
+
+/*
+ * Socket transport setup operations
+ */
+struct rpc_xprt *xs_setup_udp(struct xprt_create *args);
+struct rpc_xprt *xs_setup_tcp(struct xprt_create *args);
+
+int init_socket_xprt(void);
+void cleanup_socket_xprt(void);
+
+/*
+ * RPC transport identifiers for UDP, TCP
+ *
+ * To preserve compatibility with the historical use of raw IP protocol
+ * id's for transport selection, these are specified with the previous
+ * values. No such restriction exists for new transports, except that
+ * they may not collide with these values (17 and 6, respectively).
+ */
+#define XPRT_TRANSPORT_UDP IPPROTO_UDP
+#define XPRT_TRANSPORT_TCP IPPROTO_TCP
+
+/*
+ * RPC slot table sizes for UDP, TCP transports
+ */
+extern unsigned int xprt_udp_slot_table_entries;
+extern unsigned int xprt_tcp_slot_table_entries;
+
+/*
+ * Parameters for choosing a free port
+ */
+extern unsigned int xprt_min_resvport;
+extern unsigned int xprt_max_resvport;
+
+#define RPC_MIN_RESVPORT (1U)
+#define RPC_MAX_RESVPORT (65535U)
+#define RPC_DEF_MIN_RESVPORT (665U)
+#define RPC_DEF_MAX_RESVPORT (1023U)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SUNRPC_XPRTSOCK_H */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c5b94c1..bac17c5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -315,7 +315,7 @@
*/
u32 snd_ssthresh; /* Slow start size threshold */
u32 snd_cwnd; /* Sending congestion window */
- u16 snd_cwnd_cnt; /* Linear increase counter */
+ u32 snd_cwnd_cnt; /* Linear increase counter */
u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
u32 snd_cwnd_used;
u32 snd_cwnd_stamp;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c7c3337..d1321a8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -62,8 +62,6 @@
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned for_writepages:1; /* This is a writepages() call */
unsigned range_cyclic:1; /* range_start is cyclic */
-
- void *fs_private; /* For use by ->writepages() */
};
/*
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
new file mode 100644
index 0000000..911c2cd
--- /dev/null
+++ b/include/net/inet_frag.h
@@ -0,0 +1,60 @@
+#ifndef __NET_FRAG_H__
+#define __NET_FRAG_H__
+
+struct inet_frag_queue {
+ struct hlist_node list;
+ struct list_head lru_list; /* lru list member */
+ spinlock_t lock;
+ atomic_t refcnt;
+ struct timer_list timer; /* when will this queue expire? */
+ struct sk_buff *fragments; /* list of received fragments */
+ ktime_t stamp;
+ int len; /* total length of orig datagram */
+ int meat;
+ __u8 last_in; /* first/last segment arrived? */
+
+#define COMPLETE 4
+#define FIRST_IN 2
+#define LAST_IN 1
+};
+
+#define INETFRAGS_HASHSZ 64
+
+struct inet_frags_ctl {
+ int high_thresh;
+ int low_thresh;
+ int timeout;
+ int secret_interval;
+};
+
+struct inet_frags {
+ struct list_head lru_list;
+ struct hlist_head hash[INETFRAGS_HASHSZ];
+ rwlock_t lock;
+ u32 rnd;
+ int nqueues;
+ int qsize;
+ atomic_t mem;
+ struct timer_list secret_timer;
+ struct inet_frags_ctl *ctl;
+
+ unsigned int (*hashfn)(struct inet_frag_queue *);
+ void (*destructor)(struct inet_frag_queue *);
+ void (*skb_free)(struct sk_buff *);
+};
+
+void inet_frags_init(struct inet_frags *);
+void inet_frags_fini(struct inet_frags *);
+
+void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+void inet_frag_destroy(struct inet_frag_queue *q,
+ struct inet_frags *f, int *work);
+int inet_frag_evictor(struct inet_frags *f);
+
+static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+{
+ if (atomic_dec_and_test(&q->refcnt))
+ inet_frag_destroy(q, f, NULL);
+}
+
+#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index 3af3ed9..840dd91 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -160,6 +160,7 @@
#define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field)
#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field)
#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ip_statistics, field)
+#define IP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val)
DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
#define NET_INC_STATS(field) SNMP_INC_STATS(net_statistics, field)
#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(net_statistics, field)
@@ -177,10 +178,8 @@
extern int sysctl_ip_nonlocal_bind;
/* From ip_fragment.c */
-extern int sysctl_ipfrag_high_thresh;
-extern int sysctl_ipfrag_low_thresh;
-extern int sysctl_ipfrag_time;
-extern int sysctl_ipfrag_secret_interval;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl ip4_frags_ctl;
extern int sysctl_ipfrag_max_dist;
/* From inetpeer.c */
@@ -332,9 +331,9 @@
IP_DEFRAG_VS_FWD
};
-struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
-extern int ip_frag_nqueues;
-extern atomic_t ip_frag_mem;
+int ip_defrag(struct sk_buff *skb, u32 user);
+int ip_frag_mem(void);
+int ip_frag_nqueues(void);
/*
* Functions provided by ip_forward.c
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 672564e..4187056 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -464,10 +464,10 @@
unsigned int proto_off,
int inverse);
- int (*snat_handler)(struct sk_buff **pskb,
+ int (*snat_handler)(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
- int (*dnat_handler)(struct sk_buff **pskb,
+ int (*dnat_handler)(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
@@ -654,11 +654,11 @@
/* output hook: return false if can't linearize. diff set for TCP. */
int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
- struct sk_buff **, int *diff);
+ struct sk_buff *, int *diff);
/* input hook: return false if can't linearize. diff set for TCP. */
int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
- struct sk_buff **, int *diff);
+ struct sk_buff *, int *diff);
/* ip_vs_app initializer */
int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
@@ -832,8 +832,8 @@
extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
-extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb);
-extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb);
+extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
+extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len);
extern int ip_vs_app_init(void);
@@ -984,7 +984,6 @@
return fwd;
}
-extern int ip_vs_make_skb_writable(struct sk_buff **pskb, int len);
extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int dir);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 31b3f1b..cc796cb 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -120,12 +120,21 @@
SNMP_INC_STATS##modifier(statname##_statistics, (field)); \
})
+#define _DEVADD(statname, modifier, idev, field, val) \
+({ \
+ struct inet6_dev *_idev = (idev); \
+ if (likely(_idev != NULL)) \
+ SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \
+ SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\
+})
+
/* MIBs */
DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
#define IP6_INC_STATS(idev,field) _DEVINC(ipv6, , idev, field)
#define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field)
#define IP6_INC_STATS_USER(idev,field) _DEVINC(ipv6, _USER, idev, field)
+#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val)
DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
@@ -240,7 +249,7 @@
void (*destructor)(struct sock *));
-extern int ipv6_parse_hopopts(struct sk_buff **skbp);
+extern int ipv6_parse_hopopts(struct sk_buff *skb);
extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt);
extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
@@ -252,8 +261,8 @@
extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);
-extern int ip6_frag_nqueues;
-extern atomic_t ip6_frag_mem;
+int ip6_frag_nqueues(void);
+int ip6_frag_mem(void);
#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */
@@ -565,10 +574,8 @@
/*
* reassembly.c
*/
-extern int sysctl_ip6frag_high_thresh;
-extern int sysctl_ip6frag_low_thresh;
-extern int sysctl_ip6frag_time;
-extern int sysctl_ip6frag_secret_interval;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl ip6_frags_ctl;
extern const struct proto_ops inet6_stream_ops;
extern const struct proto_ops inet6_dgram_ops;
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 070d12c..f703533 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -15,8 +15,7 @@
struct net_device *out,
int (*okfn)(struct sk_buff *));
-extern unsigned int nf_ct_frag6_timeout;
-extern unsigned int nf_ct_frag6_low_thresh;
-extern unsigned int nf_ct_frag6_high_thresh;
+struct inet_frags_ctl;
+extern struct inet_frags_ctl nf_frags_ctl;
#endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 4056f5f..a532e7b 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -22,7 +22,7 @@
of connection tracking. */
extern unsigned int nf_conntrack_in(int pf,
unsigned int hooknum,
- struct sk_buff **pskb);
+ struct sk_buff *skb);
extern int nf_conntrack_init(void);
extern void nf_conntrack_cleanup(void);
@@ -60,17 +60,17 @@
extern struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple);
-extern int __nf_conntrack_confirm(struct sk_buff **pskb);
+extern int __nf_conntrack_confirm(struct sk_buff *skb);
/* Confirm a connection: returns NF_DROP if packet must be dropped. */
-static inline int nf_conntrack_confirm(struct sk_buff **pskb)
+static inline int nf_conntrack_confirm(struct sk_buff *skb)
{
- struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct;
+ struct nf_conn *ct = (struct nf_conn *)skb->nfct;
int ret = NF_ACCEPT;
if (ct) {
if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
- ret = __nf_conntrack_confirm(pskb);
+ ret = __nf_conntrack_confirm(skb);
nf_ct_deliver_cached_events(ct);
}
return ret;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 0dcc4c8..d7b2d54 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -29,7 +29,7 @@
/* Function to call when data passes; return verdict, or -1 to
invalidate. */
- int (*help)(struct sk_buff **pskb,
+ int (*help)(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info conntrackinfo);
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index c3cd127..f29eeb9 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -10,12 +10,12 @@
extern unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
- struct sk_buff **pskb);
+ struct sk_buff *skb);
extern int nf_nat_icmp_reply_translation(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
- struct sk_buff **pskb);
+ struct sk_buff *skb);
static inline int nf_nat_initialized(struct nf_conn *ct,
enum nf_nat_manip_type manip)
diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h
index ec98ecf..58dd226 100644
--- a/include/net/netfilter/nf_nat_helper.h
+++ b/include/net/netfilter/nf_nat_helper.h
@@ -7,21 +7,21 @@
struct sk_buff;
/* These return true or false. */
-extern int nf_nat_mangle_tcp_packet(struct sk_buff **skb,
+extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
unsigned int match_len,
const char *rep_buffer,
unsigned int rep_len);
-extern int nf_nat_mangle_udp_packet(struct sk_buff **skb,
+extern int nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
unsigned int match_len,
const char *rep_buffer,
unsigned int rep_len);
-extern int nf_nat_seq_adjust(struct sk_buff **pskb,
+extern int nf_nat_seq_adjust(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo);
diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index 14c7b2d..04578bf 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -18,7 +18,7 @@
/* Translate a packet to the target according to manip type.
Return true if succeeded. */
- int (*manip_pkt)(struct sk_buff **pskb,
+ int (*manip_pkt)(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype);
diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index f974318..75d1825 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -6,7 +6,7 @@
extern int nf_nat_rule_init(void) __init;
extern void nf_nat_rule_cleanup(void);
-extern int nf_nat_rule_find(struct sk_buff **pskb,
+extern int nf_nat_rule_find(struct sk_buff *skb,
unsigned int hooknum,
const struct net_device *in,
const struct net_device *out,
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 105bf12..1166ffb 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -45,7 +45,7 @@
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
struct inet6_protocol
{
- int (*handler)(struct sk_buff **skb);
+ int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb,
struct inet6_skb_parm *opt,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 77be396..0e84484 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1051,7 +1051,7 @@
extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi);
-extern int xfrm6_rcv(struct sk_buff **pskb);
+extern int xfrm6_rcv(struct sk_buff *skb);
extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto);
extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 04f3ffb..0ae703c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1525,6 +1525,7 @@
context->names[idx].ino = (unsigned long)-1;
}
}
+EXPORT_SYMBOL_GPL(__audit_inode_child);
/**
* auditsc_get_stamp - get local copies of audit_context values
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 734da57..a6f1ee9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1521,7 +1521,7 @@
}
static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
- struct held_lock *hlock, int chain_head)
+ struct held_lock *hlock, int chain_head, u64 chain_key)
{
/*
* Trylock needs to maintain the stack of held locks, but it
@@ -1534,7 +1534,7 @@
* graph_lock for us)
*/
if (!hlock->trylock && (hlock->check == 2) &&
- lookup_chain_cache(curr->curr_chain_key, hlock->class)) {
+ lookup_chain_cache(chain_key, hlock->class)) {
/*
* Check whether last held lock:
*
@@ -1576,7 +1576,7 @@
#else
static inline int validate_chain(struct task_struct *curr,
struct lockdep_map *lock, struct held_lock *hlock,
- int chain_head)
+ int chain_head, u64 chain_key)
{
return 1;
}
@@ -2450,11 +2450,11 @@
chain_head = 1;
}
chain_key = iterate_chain_key(chain_key, id);
- curr->curr_chain_key = chain_key;
- if (!validate_chain(curr, lock, hlock, chain_head))
+ if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
return 0;
+ curr->curr_chain_key = chain_key;
curr->lockdep_depth++;
check_chain_key(curr);
#ifdef CONFIG_DEBUG_LOCKDEP
@@ -3199,3 +3199,19 @@
}
EXPORT_SYMBOL_GPL(debug_show_held_locks);
+
+void lockdep_sys_exit(void)
+{
+ struct task_struct *curr = current;
+
+ if (unlikely(curr->lockdep_depth)) {
+ if (!debug_locks_off())
+ return;
+ printk("\n================================================\n");
+ printk( "[ BUG: lock held when returning to user space! ]\n");
+ printk( "------------------------------------------------\n");
+ printk("%s/%d is leaving the kernel with locks still held!\n",
+ curr->comm, curr->pid);
+ lockdep_print_held_locks(curr);
+ }
+}
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index c851b2d..8a135bd 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -25,28 +25,38 @@
static void *l_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct lock_class *class = v;
+ struct lock_class *class;
(*pos)++;
- if (class->lock_entry.next != &all_lock_classes)
- class = list_entry(class->lock_entry.next, struct lock_class,
- lock_entry);
- else
- class = NULL;
- m->private = class;
+ if (v == SEQ_START_TOKEN)
+ class = m->private;
+ else {
+ class = v;
+
+ if (class->lock_entry.next != &all_lock_classes)
+ class = list_entry(class->lock_entry.next,
+ struct lock_class, lock_entry);
+ else
+ class = NULL;
+ }
return class;
}
static void *l_start(struct seq_file *m, loff_t *pos)
{
- struct lock_class *class = m->private;
+ struct lock_class *class;
+ loff_t i = 0;
- if (&class->lock_entry == all_lock_classes.next)
- seq_printf(m, "all lock classes:\n");
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- return class;
+ list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ if (++i == *pos)
+ return class;
+ }
+ return NULL;
}
static void l_stop(struct seq_file *m, void *v)
@@ -101,10 +111,15 @@
static int l_show(struct seq_file *m, void *v)
{
unsigned long nr_forward_deps, nr_backward_deps;
- struct lock_class *class = m->private;
+ struct lock_class *class = v;
struct lock_list *entry;
char c1, c2, c3, c4;
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(m, "all lock classes:\n");
+ return 0;
+ }
+
seq_printf(m, "%p", class->key);
#ifdef CONFIG_DEBUG_LOCKDEP
seq_printf(m, " OPS:%8ld", class->ops);
@@ -523,10 +538,11 @@
{
struct lock_stat_seq *data = m->private;
- if (data->iter == data->stats)
- seq_header(m);
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- if (data->iter == data->iter_end)
+ data->iter = data->stats + *pos;
+ if (data->iter >= data->iter_end)
data->iter = NULL;
return data->iter;
@@ -538,8 +554,13 @@
(*pos)++;
- data->iter = v;
- data->iter++;
+ if (v == SEQ_START_TOKEN)
+ data->iter = data->stats;
+ else {
+ data->iter = v;
+ data->iter++;
+ }
+
if (data->iter == data->iter_end)
data->iter = NULL;
@@ -552,9 +573,11 @@
static int ls_show(struct seq_file *m, void *v)
{
- struct lock_stat_seq *data = m->private;
+ if (v == SEQ_START_TOKEN)
+ seq_header(m);
+ else
+ seq_stats(m, v);
- seq_stats(m, data->iter);
return 0;
}
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 691b865..d7fe50c 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -51,6 +51,7 @@
EXPORT_SYMBOL(__mutex_init);
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* We split the mutex lock/unlock logic into separate fastpath and
* slowpath functions, to reduce the register pressure on the fastpath.
@@ -92,6 +93,7 @@
}
EXPORT_SYMBOL(mutex_lock);
+#endif
static void fastcall noinline __sched
__mutex_unlock_slowpath(atomic_t *lock_count);
@@ -122,7 +124,8 @@
* Lock a mutex (possibly interruptible), slowpath:
*/
static inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+ unsigned long ip)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
@@ -132,7 +135,7 @@
spin_lock_mutex(&lock->wait_lock, flags);
debug_mutex_lock_common(lock, &waiter);
- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ mutex_acquire(&lock->dep_map, subclass, 0, ip);
debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -143,7 +146,7 @@
if (old_val == 1)
goto done;
- lock_contended(&lock->dep_map, _RET_IP_);
+ lock_contended(&lock->dep_map, ip);
for (;;) {
/*
@@ -166,7 +169,7 @@
if (unlikely(state == TASK_INTERRUPTIBLE &&
signal_pending(task))) {
mutex_remove_waiter(lock, &waiter, task_thread_info(task));
- mutex_release(&lock->dep_map, 1, _RET_IP_);
+ mutex_release(&lock->dep_map, 1, ip);
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
@@ -197,20 +200,12 @@
return 0;
}
-static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
-{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
-
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
-}
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -219,7 +214,7 @@
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -271,6 +266,7 @@
__mutex_unlock_common_slowpath(lock_count, 1);
}
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* Here come the less common (and hence less performance-critical) APIs:
* mutex_lock_interruptible() and mutex_trylock().
@@ -298,13 +294,22 @@
EXPORT_SYMBOL(mutex_lock_interruptible);
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count)
+{
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+}
+
static int fastcall noinline __sched
__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
}
+#endif
/*
* Spinlock based trylock, we take the spinlock and check whether we
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c2dd84..130214f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -49,6 +49,14 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_ctrlblk = {
.cur = -300,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 848b8fa..93867bb 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -23,7 +23,7 @@
#include "br_private.h"
-int (*br_should_route_hook) (struct sk_buff **pskb) = NULL;
+int (*br_should_route_hook)(struct sk_buff *skb);
static struct llc_sap *br_stp_sap;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3a8a015..3cedd4e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -126,6 +126,10 @@
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto drop;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
if (unlikely(is_link_local(dest))) {
/* Pause frames shouldn't be passed up by driver anyway */
if (skb->protocol == htons(ETH_P_PAUSE))
@@ -145,7 +149,7 @@
case BR_STATE_FORWARDING:
if (br_should_route_hook) {
- if (br_should_route_hook(&skb))
+ if (br_should_route_hook(skb))
return skb;
dest = eth_hdr(skb)->h_dest;
}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8245f05..da22f90 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -503,18 +503,14 @@
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
- struct sk_buff *skb = *pskb;
__u32 len = nf_bridge_encap_header_len(skb);
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
- return NF_STOLEN;
-
if (unlikely(!pskb_may_pull(skb, len)))
goto out;
@@ -584,13 +580,11 @@
* took place when the packet entered the bridge), but we
* register an IPv4 PRE_ROUTING 'sabotage' hook that will
* prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
-
if (skb->dst == (struct dst_entry *)&__fake_rtable) {
dst_release(skb->dst);
skb->dst = NULL;
@@ -625,12 +619,11 @@
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
struct net_device *parent;
int pf;
@@ -648,7 +641,7 @@
else
pf = PF_INET6;
- nf_bridge_pull_encap_header(*pskb);
+ nf_bridge_pull_encap_header(skb);
nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -666,12 +659,11 @@
return NF_STOLEN;
}
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct net_device **d = (struct net_device **)(skb->cb);
#ifdef CONFIG_SYSCTL
@@ -682,12 +674,12 @@
if (skb->protocol != htons(ETH_P_ARP)) {
if (!IS_VLAN_ARP(skb))
return NF_ACCEPT;
- nf_bridge_pull_encap_header(*pskb);
+ nf_bridge_pull_encap_header(skb);
}
if (arp_hdr(skb)->ar_pln != 4) {
if (IS_VLAN_ARP(skb))
- nf_bridge_push_encap_header(*pskb);
+ nf_bridge_push_encap_header(skb);
return NF_ACCEPT;
}
*d = (struct net_device *)in;
@@ -709,13 +701,12 @@
* NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
* will be executed.
*/
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct net_device *realindev;
- struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
if (!skb->nf_bridge)
@@ -752,13 +743,12 @@
}
/* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
- struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
+ struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct net_device *realoutdev = bridge_parent(skb->dev);
int pf;
@@ -828,13 +818,13 @@
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if ((*pskb)->nf_bridge &&
- !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ if (skb->nf_bridge &&
+ !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
return NF_STOP;
}
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index ffe468a..48a80e4 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -15,7 +15,7 @@
#include <net/arp.h>
#include <linux/module.h>
-static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
@@ -23,7 +23,6 @@
__be32 _sip, *siptr, _dip, *diptr;
struct arphdr _ah, *ap;
unsigned char _sha[ETH_ALEN], *shp;
- struct sk_buff *skb = *pskb;
ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
if (ap == NULL)
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4582659..74262e9 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -8,29 +8,22 @@
*
*/
+#include <linux/netfilter.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_nat.h>
#include <linux/module.h>
#include <net/sock.h>
-static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
struct ebt_nat_info *info = (struct ebt_nat_info *)data;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, 0))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
- memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN);
+ memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN);
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 62d23c7..6cba543 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -17,7 +17,7 @@
#include <linux/netfilter_bridge/ebt_mark_t.h>
#include <linux/module.h>
-static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
@@ -25,13 +25,13 @@
int action = info->target & -16;
if (action == MARK_SET_VALUE)
- (*pskb)->mark = info->mark;
+ skb->mark = info->mark;
else if (action == MARK_OR_VALUE)
- (*pskb)->mark |= info->mark;
+ skb->mark |= info->mark;
else if (action == MARK_AND_VALUE)
- (*pskb)->mark &= info->mark;
+ skb->mark &= info->mark;
else
- (*pskb)->mark ^= info->mark;
+ skb->mark ^= info->mark;
return info->target | ~EBT_VERDICT_BITS;
}
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9f378ea..422cb83 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -8,35 +8,28 @@
*
*/
+#include <linux/netfilter.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_redirect.h>
#include <linux/module.h>
#include <net/sock.h>
#include "../br_private.h"
-static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
struct ebt_redirect_info *info = (struct ebt_redirect_info *)data;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, 0))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
if (hooknr != NF_BR_BROUTING)
- memcpy(eth_hdr(*pskb)->h_dest,
+ memcpy(eth_hdr(skb)->h_dest,
in->br_port->br->dev->dev_addr, ETH_ALEN);
else
- memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN);
- (*pskb)->pkt_type = PACKET_HOST;
+ memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN);
+ skb->pkt_type = PACKET_HOST;
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index a507221..425ac92 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -8,6 +8,7 @@
*
*/
+#include <linux/netfilter.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_nat.h>
#include <linux/module.h>
@@ -15,34 +16,26 @@
#include <linux/if_arp.h>
#include <net/arp.h>
-static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
struct ebt_nat_info *info = (struct ebt_nat_info *) data;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, 0))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
- memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN);
+ memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
if (!(info->target & NAT_ARP_BIT) &&
- eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) {
+ eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
struct arphdr _ah, *ap;
- ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah);
+ ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
if (ap == NULL)
return EBT_DROP;
if (ap->ar_hln != ETH_ALEN)
goto out;
- if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN))
+ if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN))
return EBT_DROP;
}
out:
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d37ce04..e44519e 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -51,11 +51,11 @@
.me = THIS_MODULE,
};
-static int ebt_broute(struct sk_buff **pskb)
+static int ebt_broute(struct sk_buff *skb)
{
int ret;
- ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL,
+ ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
&broute_table);
if (ret == NF_DROP)
return 1; /* route it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 81d8414..210493f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -61,10 +61,10 @@
};
static unsigned int
-ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, pskb, in, out, &frame_filter);
+ return ebt_do_table(hook, skb, in, out, &frame_filter);
}
static struct nf_hook_ops ebt_ops_filter[] = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9c50488..3e58c2e 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -61,17 +61,17 @@
};
static unsigned int
-ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in
, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ return ebt_do_table(hook, skb, in, out, &frame_nat);
}
static unsigned int
-ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ return ebt_do_table(hook, skb, in, out, &frame_nat);
}
static struct nf_hook_ops ebt_ops_nat[] = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6018d0e..d5a09ea 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -142,7 +142,7 @@
}
/* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
+unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
struct ebt_table *table)
{
@@ -172,19 +172,19 @@
base = private->entries;
i = 0;
while (i < nentries) {
- if (ebt_basic_match(point, eth_hdr(*pskb), in, out))
+ if (ebt_basic_match(point, eth_hdr(skb), in, out))
goto letscontinue;
- if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0)
+ if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0)
goto letscontinue;
/* increase counter */
(*(counter_base + i)).pcnt++;
- (*(counter_base + i)).bcnt+=(**pskb).len;
+ (*(counter_base + i)).bcnt += skb->len;
/* these should only watch: not modify, nor tell us
what to do with the packet */
- EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in,
+ EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in,
out);
t = (struct ebt_entry_target *)
@@ -193,7 +193,7 @@
if (!t->u.target->target)
verdict = ((struct ebt_standard_target *)t)->verdict;
else
- verdict = t->u.target->target(pskb, hook,
+ verdict = t->u.target->target(skb, hook,
in, out, t->data, t->target_size);
if (verdict == EBT_ACCEPT) {
read_unlock_bh(&table->lock);
diff --git a/net/core/dev.c b/net/core/dev.c
index 99b7bda..38b03da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1362,22 +1362,21 @@
goto out_set_summed;
}
- if (skb_cloned(skb)) {
+ offset = skb->csum_start - skb_headroom(skb);
+ BUG_ON(offset >= skb_headlen(skb));
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+ offset += skb->csum_offset;
+ BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, offset + sizeof(__sum16))) {
ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
if (ret)
goto out;
}
- offset = skb->csum_start - skb_headroom(skb);
- BUG_ON(offset > (int)skb->len);
- csum = skb_checksum(skb, offset, skb->len-offset, 0);
-
- offset = skb_headlen(skb) - offset;
- BUG_ON(offset <= 0);
- BUG_ON(skb->csum_offset + 2 > offset);
-
- *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
- csum_fold(csum);
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
@@ -1949,28 +1948,52 @@
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;
+ u32 ttl = G_TC_RTTL(skb->tc_verd);
- if (dev->qdisc_ingress) {
- __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
- if (MAX_RED_LOOP < ttl++) {
- printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
- skb->iif, skb->dev->ifindex);
- return TC_ACT_SHOT;
- }
-
- skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
-
- skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
-
- spin_lock(&dev->ingress_lock);
- if ((q = dev->qdisc_ingress) != NULL)
- result = q->enqueue(skb, q);
- spin_unlock(&dev->ingress_lock);
-
+ if (MAX_RED_LOOP < ttl++) {
+ printk(KERN_WARNING
+ "Redir loop detected Dropping packet (%d->%d)\n",
+ skb->iif, dev->ifindex);
+ return TC_ACT_SHOT;
}
+ skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
+ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+
+ spin_lock(&dev->ingress_lock);
+ if ((q = dev->qdisc_ingress) != NULL)
+ result = q->enqueue(skb, q);
+ spin_unlock(&dev->ingress_lock);
+
return result;
}
+
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+ struct packet_type **pt_prev,
+ int *ret, struct net_device *orig_dev)
+{
+ if (!skb->dev->qdisc_ingress)
+ goto out;
+
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ } else {
+ /* Huh? Why does turning on AF_PACKET affect this? */
+ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+ }
+
+ switch (ing_filter(skb)) {
+ case TC_ACT_SHOT:
+ case TC_ACT_STOLEN:
+ kfree_skb(skb);
+ return NULL;
+ }
+
+out:
+ skb->tc_verd = 0;
+ return skb;
+}
#endif
int netif_receive_skb(struct sk_buff *skb)
@@ -2021,21 +2044,9 @@
}
#ifdef CONFIG_NET_CLS_ACT
- if (pt_prev) {
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = NULL; /* noone else should process this after*/
- } else {
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
- }
-
- ret = ing_filter(skb);
-
- if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
- kfree_skb(skb);
+ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
goto out;
- }
-
- skb->tc_verd = 0;
ncls:
#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c52df85..cd3af59 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -481,6 +481,8 @@
if (!creat)
goto out;
+ ASSERT_RTNL();
+
n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
if (!n)
goto out;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 944189d..70d9b5d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -362,6 +362,97 @@
__kfree_skb(skb);
}
+static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+ new->tstamp = old->tstamp;
+ new->dev = old->dev;
+ new->transport_header = old->transport_header;
+ new->network_header = old->network_header;
+ new->mac_header = old->mac_header;
+ new->dst = dst_clone(old->dst);
+#ifdef CONFIG_INET
+ new->sp = secpath_get(old->sp);
+#endif
+ memcpy(new->cb, old->cb, sizeof(old->cb));
+ new->csum_start = old->csum_start;
+ new->csum_offset = old->csum_offset;
+ new->local_df = old->local_df;
+ new->pkt_type = old->pkt_type;
+ new->ip_summed = old->ip_summed;
+ skb_copy_queue_mapping(new, old);
+ new->priority = old->priority;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ new->ipvs_property = old->ipvs_property;
+#endif
+ new->protocol = old->protocol;
+ new->mark = old->mark;
+ __nf_copy(new, old);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+ new->nf_trace = old->nf_trace;
+#endif
+#ifdef CONFIG_NET_SCHED
+ new->tc_index = old->tc_index;
+#ifdef CONFIG_NET_CLS_ACT
+ new->tc_verd = old->tc_verd;
+#endif
+#endif
+ skb_copy_secmark(new, old);
+}
+
+static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
+{
+#define C(x) n->x = skb->x
+
+ n->next = n->prev = NULL;
+ n->sk = NULL;
+ __copy_skb_header(n, skb);
+
+ C(len);
+ C(data_len);
+ C(mac_len);
+ n->cloned = 1;
+ n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+ n->nohdr = 0;
+ n->destructor = NULL;
+#ifdef CONFIG_NET_CLS_ACT
+ /* FIXME What is this and why don't we do it in copy_skb_header? */
+ n->tc_verd = SET_TC_VERD(n->tc_verd,0);
+ n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
+ n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
+ C(iif);
+#endif
+ C(truesize);
+ atomic_set(&n->users, 1);
+ C(head);
+ C(data);
+ C(tail);
+ C(end);
+
+ atomic_inc(&(skb_shinfo(skb)->dataref));
+ skb->cloned = 1;
+
+ return n;
+#undef C
+}
+
+/**
+ * skb_morph - morph one skb into another
+ * @dst: the skb to receive the contents
+ * @src: the skb to supply the contents
+ *
+ * This is identical to skb_clone except that the target skb is
+ * supplied by the user.
+ *
+ * The target skb is returned upon exit.
+ */
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
+{
+ skb_release_data(dst);
+ return __skb_clone(dst, src);
+}
+EXPORT_SYMBOL_GPL(skb_morph);
+
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
@@ -393,66 +484,7 @@
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
-#define C(x) n->x = skb->x
-
- n->next = n->prev = NULL;
- n->sk = NULL;
- C(tstamp);
- C(dev);
- C(transport_header);
- C(network_header);
- C(mac_header);
- C(dst);
- dst_clone(skb->dst);
- C(sp);
-#ifdef CONFIG_INET
- secpath_get(skb->sp);
-#endif
- memcpy(n->cb, skb->cb, sizeof(skb->cb));
- C(len);
- C(data_len);
- C(mac_len);
- C(csum);
- C(local_df);
- n->cloned = 1;
- n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
- n->nohdr = 0;
- C(pkt_type);
- C(ip_summed);
- skb_copy_queue_mapping(n, skb);
- C(priority);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
- C(ipvs_property);
-#endif
- C(protocol);
- n->destructor = NULL;
- C(mark);
- __nf_copy(n, skb);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- C(nf_trace);
-#endif
-#ifdef CONFIG_NET_SCHED
- C(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
- n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
- n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
- n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
- C(iif);
-#endif
-#endif
- skb_copy_secmark(n, skb);
- C(truesize);
- atomic_set(&n->users, 1);
- C(head);
- C(data);
- C(tail);
- C(end);
-
- atomic_inc(&(skb_shinfo(skb)->dataref));
- skb->cloned = 1;
-
- return n;
+ return __skb_clone(n, skb);
}
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
@@ -463,50 +495,15 @@
*/
unsigned long offset = new->data - old->data;
#endif
- new->sk = NULL;
- new->dev = old->dev;
- skb_copy_queue_mapping(new, old);
- new->priority = old->priority;
- new->protocol = old->protocol;
- new->dst = dst_clone(old->dst);
-#ifdef CONFIG_INET
- new->sp = secpath_get(old->sp);
-#endif
- new->csum_start = old->csum_start;
- new->csum_offset = old->csum_offset;
- new->ip_summed = old->ip_summed;
- new->transport_header = old->transport_header;
- new->network_header = old->network_header;
- new->mac_header = old->mac_header;
+
+ __copy_skb_header(new, old);
+
#ifndef NET_SKBUFF_DATA_USES_OFFSET
/* {transport,network,mac}_header are relative to skb->head */
new->transport_header += offset;
new->network_header += offset;
new->mac_header += offset;
#endif
- memcpy(new->cb, old->cb, sizeof(old->cb));
- new->local_df = old->local_df;
- new->fclone = SKB_FCLONE_UNAVAILABLE;
- new->pkt_type = old->pkt_type;
- new->tstamp = old->tstamp;
- new->destructor = NULL;
- new->mark = old->mark;
- __nf_copy(new, old);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- new->nf_trace = old->nf_trace;
-#endif
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
- new->ipvs_property = old->ipvs_property;
-#endif
-#ifdef CONFIG_NET_SCHED
-#ifdef CONFIG_NET_CLS_ACT
- new->tc_verd = old->tc_verd;
-#endif
- new->tc_index = old->tc_index;
-#endif
- skb_copy_secmark(new, old);
- atomic_set(&new->users, 1);
skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -685,7 +682,7 @@
skb->transport_header += off;
skb->network_header += off;
skb->mac_header += off;
- skb->csum_start += off;
+ skb->csum_start += nhead;
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 006a383..cac5354 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -767,10 +767,9 @@
return 0;
}
-static int dccp_v6_rcv(struct sk_buff **pskb)
+static int dccp_v6_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
- struct sk_buff *skb = *pskb;
struct sock *sk;
int min_cov;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index f7fba77..43fcd29 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -88,12 +88,12 @@
static unsigned int dnrmg_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- dnrmg_send_peer(*pskb);
+ dnrmg_send_peer(skb);
return NF_ACCEPT;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d..93fe396 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
- sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+ inet_fragment.o
obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 0000000..484cf51
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,174 @@
+/*
+ * inet fragments management
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Pavel Emelyanov <xemul@openvz.org>
+ * Started as consolidation of ipv4/ip_fragment.c,
+ * ipv6/reassembly. and ipv6 nf conntrack reassembly
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/inet_frag.h>
+
+static void inet_frag_secret_rebuild(unsigned long dummy)
+{
+ struct inet_frags *f = (struct inet_frags *)dummy;
+ unsigned long now = jiffies;
+ int i;
+
+ write_lock(&f->lock);
+ get_random_bytes(&f->rnd, sizeof(u32));
+ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_queue *q;
+ struct hlist_node *p, *n;
+
+ hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+ unsigned int hval = f->hashfn(q);
+
+ if (hval != i) {
+ hlist_del(&q->list);
+
+ /* Relink to new hash chain. */
+ hlist_add_head(&q->list, &f->hash[hval]);
+ }
+ }
+ }
+ write_unlock(&f->lock);
+
+ mod_timer(&f->secret_timer, now + f->ctl->secret_interval);
+}
+
+void inet_frags_init(struct inet_frags *f)
+{
+ int i;
+
+ for (i = 0; i < INETFRAGS_HASHSZ; i++)
+ INIT_HLIST_HEAD(&f->hash[i]);
+
+ INIT_LIST_HEAD(&f->lru_list);
+ rwlock_init(&f->lock);
+
+ f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ (jiffies ^ (jiffies >> 6)));
+
+ f->nqueues = 0;
+ atomic_set(&f->mem, 0);
+
+ init_timer(&f->secret_timer);
+ f->secret_timer.function = inet_frag_secret_rebuild;
+ f->secret_timer.data = (unsigned long)f;
+ f->secret_timer.expires = jiffies + f->ctl->secret_interval;
+ add_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_init);
+
+void inet_frags_fini(struct inet_frags *f)
+{
+ del_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_fini);
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ write_lock(&f->lock);
+ hlist_del(&fq->list);
+ list_del(&fq->lru_list);
+ f->nqueues--;
+ write_unlock(&f->lock);
+}
+
+void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ if (del_timer(&fq->timer))
+ atomic_dec(&fq->refcnt);
+
+ if (!(fq->last_in & COMPLETE)) {
+ fq_unlink(fq, f);
+ atomic_dec(&fq->refcnt);
+ fq->last_in |= COMPLETE;
+ }
+}
+
+EXPORT_SYMBOL(inet_frag_kill);
+
+static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
+ int *work)
+{
+ if (work)
+ *work -= skb->truesize;
+
+ atomic_sub(skb->truesize, &f->mem);
+ if (f->skb_free)
+ f->skb_free(skb);
+ kfree_skb(skb);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
+ int *work)
+{
+ struct sk_buff *fp;
+
+ BUG_TRAP(q->last_in & COMPLETE);
+ BUG_TRAP(del_timer(&q->timer) == 0);
+
+ /* Release all fragment data. */
+ fp = q->fragments;
+ while (fp) {
+ struct sk_buff *xp = fp->next;
+
+ frag_kfree_skb(f, fp, work);
+ fp = xp;
+ }
+
+ if (work)
+ *work -= f->qsize;
+ atomic_sub(f->qsize, &f->mem);
+
+ f->destructor(q);
+
+}
+EXPORT_SYMBOL(inet_frag_destroy);
+
+int inet_frag_evictor(struct inet_frags *f)
+{
+ struct inet_frag_queue *q;
+ int work, evicted = 0;
+
+ work = atomic_read(&f->mem) - f->ctl->low_thresh;
+ while (work > 0) {
+ read_lock(&f->lock);
+ if (list_empty(&f->lru_list)) {
+ read_unlock(&f->lock);
+ break;
+ }
+
+ q = list_first_entry(&f->lru_list,
+ struct inet_frag_queue, lru_list);
+ atomic_inc(&q->refcnt);
+ read_unlock(&f->lock);
+
+ spin_lock(&q->lock);
+ if (!(q->last_in & COMPLETE))
+ inet_frag_kill(q, f);
+ spin_unlock(&q->lock);
+
+ if (atomic_dec_and_test(&q->refcnt))
+ inet_frag_destroy(q, f, &work);
+ evicted++;
+ }
+
+ return evicted;
+}
+EXPORT_SYMBOL(inet_frag_evictor);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index afbf938..877da3e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -40,7 +40,7 @@
#include <net/route.h>
#include <net/xfrm.h>
-static inline int ip_forward_finish(struct sk_buff *skb)
+static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fabb86d..443b3f8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -39,6 +39,7 @@
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/inet_frag.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/inet.h>
@@ -49,21 +50,8 @@
* as well. Or notify me, at least. --ANK
*/
-/* Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to measurably
- * harm machine performance.
- */
-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-
int sysctl_ipfrag_max_dist __read_mostly = 64;
-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
- */
-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
-
struct ipfrag_skb_cb
{
struct inet_skb_parm h;
@@ -74,153 +62,102 @@
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
- struct hlist_node list;
- struct list_head lru_list; /* lru list member */
+ struct inet_frag_queue q;
+
u32 user;
__be32 saddr;
__be32 daddr;
__be16 id;
u8 protocol;
- u8 last_in;
-#define COMPLETE 4
-#define FIRST_IN 2
-#define LAST_IN 1
-
- struct sk_buff *fragments; /* linked list of received fragments */
- int len; /* total length of original datagram */
- int meat;
- spinlock_t lock;
- atomic_t refcnt;
- struct timer_list timer; /* when will this queue expire? */
- ktime_t stamp;
int iif;
unsigned int rid;
struct inet_peer *peer;
};
-/* Hash table. */
+struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+ /*
+ * Fragment cache limits. We will commit 256K at one time. Should we
+ * cross that limit we will prune down to 192K. This should cope with
+ * even the most extreme cases without allowing an attacker to
+ * measurably harm machine performance.
+ */
+ .high_thresh = 256 * 1024,
+ .low_thresh = 192 * 1024,
-#define IPQ_HASHSZ 64
+ /*
+ * Important NOTE! Fragment queue must be destroyed before MSL expires.
+ * RFC791 is wrong proposing to prolongate timer each fragment arrival
+ * by TTL.
+ */
+ .timeout = IP_FRAG_TIME,
+ .secret_interval = 10 * 60 * HZ,
+};
-/* Per-bucket lock is easy to add now. */
-static struct hlist_head ipq_hash[IPQ_HASHSZ];
-static DEFINE_RWLOCK(ipfrag_lock);
-static u32 ipfrag_hash_rnd;
-static LIST_HEAD(ipq_lru_list);
-int ip_frag_nqueues = 0;
+static struct inet_frags ip4_frags;
-static __inline__ void __ipq_unlink(struct ipq *qp)
+int ip_frag_nqueues(void)
{
- hlist_del(&qp->list);
- list_del(&qp->lru_list);
- ip_frag_nqueues--;
+ return ip4_frags.nqueues;
}
-static __inline__ void ipq_unlink(struct ipq *ipq)
+int ip_frag_mem(void)
{
- write_lock(&ipfrag_lock);
- __ipq_unlink(ipq);
- write_unlock(&ipfrag_lock);
+ return atomic_read(&ip4_frags.mem);
}
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev);
+
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
- ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+ ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ipfrag_secret_rebuild(unsigned long dummy)
+static unsigned int ip4_hashfn(struct inet_frag_queue *q)
{
- unsigned long now = jiffies;
- int i;
+ struct ipq *ipq;
- write_lock(&ipfrag_lock);
- get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
- for (i = 0; i < IPQ_HASHSZ; i++) {
- struct ipq *q;
- struct hlist_node *p, *n;
-
- hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
- unsigned int hval = ipqhashfn(q->id, q->saddr,
- q->daddr, q->protocol);
-
- if (hval != i) {
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hlist_add_head(&q->list, &ipq_hash[hval]);
- }
- }
- }
- write_unlock(&ipfrag_lock);
-
- mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+ ipq = container_of(q, struct ipq, q);
+ return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
}
-atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
-
/* Memory Tracking Functions. */
static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip_frag_mem);
+ atomic_sub(skb->truesize, &ip4_frags.mem);
kfree_skb(skb);
}
-static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
{
- if (work)
- *work -= sizeof(struct ipq);
- atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+ struct ipq *qp;
+
+ qp = container_of(q, struct ipq, q);
+ if (qp->peer)
+ inet_putpeer(qp->peer);
kfree(qp);
}
static __inline__ struct ipq *frag_alloc_queue(void)
{
- struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+ struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
if (!qp)
return NULL;
- atomic_add(sizeof(struct ipq), &ip_frag_mem);
+ atomic_add(sizeof(struct ipq), &ip4_frags.mem);
return qp;
}
/* Destruction primitives. */
-/* Complete destruction of ipq. */
-static void ip_frag_destroy(struct ipq *qp, int *work)
+static __inline__ void ipq_put(struct ipq *ipq)
{
- struct sk_buff *fp;
-
- BUG_TRAP(qp->last_in&COMPLETE);
- BUG_TRAP(del_timer(&qp->timer) == 0);
-
- if (qp->peer)
- inet_putpeer(qp->peer);
-
- /* Release all fragment data. */
- fp = qp->fragments;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- frag_kfree_skb(fp, work);
- fp = xp;
- }
-
- /* Finally, release the queue descriptor itself. */
- frag_free_queue(qp, work);
-}
-
-static __inline__ void ipq_put(struct ipq *ipq, int *work)
-{
- if (atomic_dec_and_test(&ipq->refcnt))
- ip_frag_destroy(ipq, work);
+ inet_frag_put(&ipq->q, &ip4_frags);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -228,14 +165,7 @@
*/
static void ipq_kill(struct ipq *ipq)
{
- if (del_timer(&ipq->timer))
- atomic_dec(&ipq->refcnt);
-
- if (!(ipq->last_in & COMPLETE)) {
- ipq_unlink(ipq);
- atomic_dec(&ipq->refcnt);
- ipq->last_in |= COMPLETE;
- }
+ inet_frag_kill(&ipq->q, &ip4_frags);
}
/* Memory limiting on fragments. Evictor trashes the oldest
@@ -243,33 +173,11 @@
*/
static void ip_evictor(void)
{
- struct ipq *qp;
- struct list_head *tmp;
- int work;
+ int evicted;
- work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
- if (work <= 0)
- return;
-
- while (work > 0) {
- read_lock(&ipfrag_lock);
- if (list_empty(&ipq_lru_list)) {
- read_unlock(&ipfrag_lock);
- return;
- }
- tmp = ipq_lru_list.next;
- qp = list_entry(tmp, struct ipq, lru_list);
- atomic_inc(&qp->refcnt);
- read_unlock(&ipfrag_lock);
-
- spin_lock(&qp->lock);
- if (!(qp->last_in&COMPLETE))
- ipq_kill(qp);
- spin_unlock(&qp->lock);
-
- ipq_put(qp, &work);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- }
+ evicted = inet_frag_evictor(&ip4_frags);
+ if (evicted)
+ IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
}
/*
@@ -279,9 +187,9 @@
{
struct ipq *qp = (struct ipq *) arg;
- spin_lock(&qp->lock);
+ spin_lock(&qp->q.lock);
- if (qp->last_in & COMPLETE)
+ if (qp->q.last_in & COMPLETE)
goto out;
ipq_kill(qp);
@@ -289,8 +197,8 @@
IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
- struct sk_buff *head = qp->fragments;
+ if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
+ struct sk_buff *head = qp->q.fragments;
/* Send an ICMP "Fragment Reassembly Timeout" message. */
if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -298,8 +206,8 @@
}
}
out:
- spin_unlock(&qp->lock);
- ipq_put(qp, NULL);
+ spin_unlock(&qp->q.lock);
+ ipq_put(qp);
}
/* Creation primitives. */
@@ -312,7 +220,7 @@
#endif
unsigned int hash;
- write_lock(&ipfrag_lock);
+ write_lock(&ip4_frags.lock);
hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
qp_in->protocol);
#ifdef CONFIG_SMP
@@ -320,31 +228,31 @@
* such entry could be created on other cpu, while we
* promoted read lock to write lock.
*/
- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+ hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == qp_in->id &&
qp->saddr == qp_in->saddr &&
qp->daddr == qp_in->daddr &&
qp->protocol == qp_in->protocol &&
qp->user == qp_in->user) {
- atomic_inc(&qp->refcnt);
- write_unlock(&ipfrag_lock);
- qp_in->last_in |= COMPLETE;
- ipq_put(qp_in, NULL);
+ atomic_inc(&qp->q.refcnt);
+ write_unlock(&ip4_frags.lock);
+ qp_in->q.last_in |= COMPLETE;
+ ipq_put(qp_in);
return qp;
}
}
#endif
qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
- atomic_inc(&qp->refcnt);
+ if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
+ atomic_inc(&qp->q.refcnt);
- atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &ipq_hash[hash]);
- INIT_LIST_HEAD(&qp->lru_list);
- list_add_tail(&qp->lru_list, &ipq_lru_list);
- ip_frag_nqueues++;
- write_unlock(&ipfrag_lock);
+ atomic_inc(&qp->q.refcnt);
+ hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
+ INIT_LIST_HEAD(&qp->q.lru_list);
+ list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+ ip4_frags.nqueues++;
+ write_unlock(&ip4_frags.lock);
return qp;
}
@@ -357,23 +265,18 @@
goto out_nomem;
qp->protocol = iph->protocol;
- qp->last_in = 0;
qp->id = iph->id;
qp->saddr = iph->saddr;
qp->daddr = iph->daddr;
qp->user = user;
- qp->len = 0;
- qp->meat = 0;
- qp->fragments = NULL;
- qp->iif = 0;
qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
/* Initialize a timer for this entry. */
- init_timer(&qp->timer);
- qp->timer.data = (unsigned long) qp; /* pointer to queue */
- qp->timer.function = ip_expire; /* expire function */
- spin_lock_init(&qp->lock);
- atomic_set(&qp->refcnt, 1);
+ init_timer(&qp->q.timer);
+ qp->q.timer.data = (unsigned long) qp; /* pointer to queue */
+ qp->q.timer.function = ip_expire; /* expire function */
+ spin_lock_init(&qp->q.lock);
+ atomic_set(&qp->q.refcnt, 1);
return ip_frag_intern(qp);
@@ -395,20 +298,20 @@
struct ipq *qp;
struct hlist_node *n;
- read_lock(&ipfrag_lock);
+ read_lock(&ip4_frags.lock);
hash = ipqhashfn(id, saddr, daddr, protocol);
- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+ hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == id &&
qp->saddr == saddr &&
qp->daddr == daddr &&
qp->protocol == protocol &&
qp->user == user) {
- atomic_inc(&qp->refcnt);
- read_unlock(&ipfrag_lock);
+ atomic_inc(&qp->q.refcnt);
+ read_unlock(&ip4_frags.lock);
return qp;
}
}
- read_unlock(&ipfrag_lock);
+ read_unlock(&ip4_frags.lock);
return ip_frag_create(iph, user);
}
@@ -429,7 +332,7 @@
end = atomic_inc_return(&peer->rid);
qp->rid = end;
- rc = qp->fragments && (end - start) > max;
+ rc = qp->q.fragments && (end - start) > max;
if (rc) {
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -442,39 +345,42 @@
{
struct sk_buff *fp;
- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
- atomic_inc(&qp->refcnt);
+ if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
+ atomic_inc(&qp->q.refcnt);
return -ETIMEDOUT;
}
- fp = qp->fragments;
+ fp = qp->q.fragments;
do {
struct sk_buff *xp = fp->next;
frag_kfree_skb(fp, NULL);
fp = xp;
} while (fp);
- qp->last_in = 0;
- qp->len = 0;
- qp->meat = 0;
- qp->fragments = NULL;
+ qp->q.last_in = 0;
+ qp->q.len = 0;
+ qp->q.meat = 0;
+ qp->q.fragments = NULL;
qp->iif = 0;
return 0;
}
/* Add new segment to existing queue. */
-static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct sk_buff *prev, *next;
+ struct net_device *dev;
int flags, offset;
int ihl, end;
+ int err = -ENOENT;
- if (qp->last_in & COMPLETE)
+ if (qp->q.last_in & COMPLETE)
goto err;
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
- unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+ unlikely(ip_frag_too_far(qp)) &&
+ unlikely(err = ip_frag_reinit(qp))) {
ipq_kill(qp);
goto err;
}
@@ -487,36 +393,40 @@
/* Determine the position of this fragment. */
end = offset + skb->len - ihl;
+ err = -EINVAL;
/* Is this the final fragment? */
if ((flags & IP_MF) == 0) {
/* If we already have some bits beyond end
* or have different end, the segment is corrrupted.
*/
- if (end < qp->len ||
- ((qp->last_in & LAST_IN) && end != qp->len))
+ if (end < qp->q.len ||
+ ((qp->q.last_in & LAST_IN) && end != qp->q.len))
goto err;
- qp->last_in |= LAST_IN;
- qp->len = end;
+ qp->q.last_in |= LAST_IN;
+ qp->q.len = end;
} else {
if (end&7) {
end &= ~7;
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->ip_summed = CHECKSUM_NONE;
}
- if (end > qp->len) {
+ if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
- if (qp->last_in & LAST_IN)
+ if (qp->q.last_in & LAST_IN)
goto err;
- qp->len = end;
+ qp->q.len = end;
}
}
if (end == offset)
goto err;
+ err = -ENOMEM;
if (pskb_pull(skb, ihl) == NULL)
goto err;
- if (pskb_trim_rcsum(skb, end-offset))
+
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto err;
/* Find out which fragments are in front and at the back of us
@@ -524,7 +434,7 @@
* this fragment, right?
*/
prev = NULL;
- for (next = qp->fragments; next != NULL; next = next->next) {
+ for (next = qp->q.fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -539,8 +449,10 @@
if (i > 0) {
offset += i;
+ err = -EINVAL;
if (end <= offset)
goto err;
+ err = -ENOMEM;
if (!pskb_pull(skb, i))
goto err;
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -548,6 +460,8 @@
}
}
+ err = -ENOMEM;
+
while (next && FRAG_CB(next)->offset < end) {
int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
@@ -558,7 +472,7 @@
if (!pskb_pull(next, i))
goto err;
FRAG_CB(next)->offset += i;
- qp->meat -= i;
+ qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
@@ -573,9 +487,9 @@
if (prev)
prev->next = next;
else
- qp->fragments = next;
+ qp->q.fragments = next;
- qp->meat -= free_it->len;
+ qp->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL);
}
}
@@ -587,50 +501,77 @@
if (prev)
prev->next = skb;
else
- qp->fragments = skb;
+ qp->q.fragments = skb;
- if (skb->dev)
- qp->iif = skb->dev->ifindex;
- skb->dev = NULL;
- qp->stamp = skb->tstamp;
- qp->meat += skb->len;
- atomic_add(skb->truesize, &ip_frag_mem);
+ dev = skb->dev;
+ if (dev) {
+ qp->iif = dev->ifindex;
+ skb->dev = NULL;
+ }
+ qp->q.stamp = skb->tstamp;
+ qp->q.meat += skb->len;
+ atomic_add(skb->truesize, &ip4_frags.mem);
if (offset == 0)
- qp->last_in |= FIRST_IN;
+ qp->q.last_in |= FIRST_IN;
- write_lock(&ipfrag_lock);
- list_move_tail(&qp->lru_list, &ipq_lru_list);
- write_unlock(&ipfrag_lock);
+ if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
+ return ip_frag_reasm(qp, prev, dev);
- return;
+ write_lock(&ip4_frags.lock);
+ list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+ write_unlock(&ip4_frags.lock);
+ return -EINPROGRESS;
err:
kfree_skb(skb);
+ return err;
}
/* Build a new IP datagram from all its fragments. */
-static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev)
{
struct iphdr *iph;
- struct sk_buff *fp, *head = qp->fragments;
+ struct sk_buff *fp, *head = qp->q.fragments;
int len;
int ihlen;
+ int err;
ipq_kill(qp);
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_nomem;
+
+ fp->next = head->next;
+ prev->next = fp;
+
+ skb_morph(head, qp->q.fragments);
+ head->next = qp->q.fragments->next;
+
+ kfree_skb(qp->q.fragments);
+ qp->q.fragments = head;
+ }
+
BUG_TRAP(head != NULL);
BUG_TRAP(FRAG_CB(head)->offset == 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
- len = ihlen + qp->len;
+ len = ihlen + qp->q.len;
+ err = -E2BIG;
if (len > 65535)
goto out_oversize;
/* Head of list must not be cloned. */
+ err = -ENOMEM;
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
goto out_nomem;
@@ -654,12 +595,12 @@
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &ip_frag_mem);
+ atomic_add(clone->truesize, &ip4_frags.mem);
}
skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &ip_frag_mem);
+ atomic_sub(head->truesize, &ip4_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -669,19 +610,19 @@
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &ip_frag_mem);
+ atomic_sub(fp->truesize, &ip4_frags.mem);
}
head->next = NULL;
head->dev = dev;
- head->tstamp = qp->stamp;
+ head->tstamp = qp->q.stamp;
iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
- qp->fragments = NULL;
- return head;
+ qp->q.fragments = NULL;
+ return 0;
out_nomem:
LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
@@ -694,54 +635,46 @@
NIPQUAD(qp->saddr));
out_fail:
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- return NULL;
+ return err;
}
/* Process an incoming IP datagram fragment. */
-struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct sk_buff *skb, u32 user)
{
struct ipq *qp;
- struct net_device *dev;
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
/* Start by cleaning up the memory. */
- if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+ if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
ip_evictor();
- dev = skb->dev;
-
/* Lookup (or create) queue header */
if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
- struct sk_buff *ret = NULL;
+ int ret;
- spin_lock(&qp->lock);
+ spin_lock(&qp->q.lock);
- ip_frag_queue(qp, skb);
+ ret = ip_frag_queue(qp, skb);
- if (qp->last_in == (FIRST_IN|LAST_IN) &&
- qp->meat == qp->len)
- ret = ip_frag_reasm(qp, dev);
-
- spin_unlock(&qp->lock);
- ipq_put(qp, NULL);
+ spin_unlock(&qp->q.lock);
+ ipq_put(qp);
return ret;
}
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
- return NULL;
+ return -ENOMEM;
}
void __init ipfrag_init(void)
{
- ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
- init_timer(&ipfrag_secret_timer);
- ipfrag_secret_timer.function = ipfrag_secret_rebuild;
- ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
- add_timer(&ipfrag_secret_timer);
+ ip4_frags.ctl = &ip4_frags_ctl;
+ ip4_frags.hashfn = ip4_hashfn;
+ ip4_frags.destructor = ip4_frag_free;
+ ip4_frags.skb_free = NULL;
+ ip4_frags.qsize = sizeof(struct ipq);
+ inet_frags_init(&ip4_frags);
}
EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 41d8964..168c871 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -172,8 +172,7 @@
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
- if (skb == NULL) {
+ if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
read_unlock(&ip_ra_lock);
return 1;
}
@@ -196,7 +195,7 @@
return 0;
}
-static inline int ip_local_deliver_finish(struct sk_buff *skb)
+static int ip_local_deliver_finish(struct sk_buff *skb)
{
__skb_pull(skb, ip_hdrlen(skb));
@@ -265,8 +264,7 @@
*/
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
- if (!skb)
+ if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
return 0;
}
@@ -326,7 +324,7 @@
return -1;
}
-static inline int ip_rcv_finish(struct sk_buff *skb)
+static int ip_rcv_finish(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 699f067..f508835 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -202,7 +202,7 @@
skb->dst->dev->mtu : dst_mtu(skb->dst);
}
-static inline int ip_finish_output(struct sk_buff *skb)
+static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 341474e..664cb8e 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -25,6 +25,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/netfilter.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/tcp.h>
@@ -328,18 +329,18 @@
spin_unlock(&cp->lock);
}
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app)
{
int diff;
- const unsigned int tcp_offset = ip_hdrlen(*pskb);
+ const unsigned int tcp_offset = ip_hdrlen(skb);
struct tcphdr *th;
__u32 seq;
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -360,7 +361,7 @@
if (app->pkt_out == NULL)
return 1;
- if (!app->pkt_out(app, cp, pskb, &diff))
+ if (!app->pkt_out(app, cp, skb, &diff))
return 0;
/*
@@ -378,7 +379,7 @@
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom)
*/
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_app *app;
@@ -391,7 +392,7 @@
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_out(cp, pskb, app);
+ return app_tcp_pkt_out(cp, skb, app);
/*
* Call private output hook function
@@ -399,22 +400,22 @@
if (app->pkt_out == NULL)
return 1;
- return app->pkt_out(app, cp, pskb, NULL);
+ return app->pkt_out(app, cp, skb, NULL);
}
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app)
{
int diff;
- const unsigned int tcp_offset = ip_hdrlen(*pskb);
+ const unsigned int tcp_offset = ip_hdrlen(skb);
struct tcphdr *th;
__u32 seq;
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -435,7 +436,7 @@
if (app->pkt_in == NULL)
return 1;
- if (!app->pkt_in(app, cp, pskb, &diff))
+ if (!app->pkt_in(app, cp, skb, &diff))
return 0;
/*
@@ -453,7 +454,7 @@
* called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom).
*/
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_app *app;
@@ -466,7 +467,7 @@
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_in(cp, pskb, app);
+ return app_tcp_pkt_in(cp, skb, app);
/*
* Call private input hook function
@@ -474,7 +475,7 @@
if (app->pkt_in == NULL)
return 1;
- return app->pkt_in(app, cp, pskb, NULL);
+ return app->pkt_in(app, cp, skb, NULL);
}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index fbca2a2..c6ed765 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -58,7 +58,6 @@
#ifdef CONFIG_IP_VS_DEBUG
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
-EXPORT_SYMBOL(ip_vs_make_skb_writable);
/* ID used in ICMP lookups */
@@ -163,42 +162,6 @@
}
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
-{
- struct sk_buff *skb = *pskb;
-
- /* skb is already used, better copy skb and its payload */
- if (unlikely(skb_shared(skb) || skb->sk))
- goto copy_skb;
-
- /* skb data is already used, copy it */
- if (unlikely(skb_cloned(skb)))
- goto copy_data;
-
- return pskb_may_pull(skb, writable_len);
-
- copy_data:
- if (unlikely(writable_len > skb->len))
- return 0;
- return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-
- copy_skb:
- if (unlikely(writable_len > skb->len))
- return 0;
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb)
- return 0;
- BUG_ON(skb_is_nonlinear(skb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(skb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = skb;
- return 1;
-}
-
/*
* IPVS persistent scheduling function
* It creates a connection entry according to its template if exists,
@@ -525,12 +488,12 @@
* for VS/NAT.
*/
static unsigned int ip_vs_post_routing(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (!((*pskb)->ipvs_property))
+ if (!skb->ipvs_property)
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
return NF_STOP;
@@ -541,13 +504,14 @@
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
}
-static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- skb = ip_defrag(skb, user);
- if (skb)
+ int err = ip_defrag(skb, user);
+
+ if (!err)
ip_send_check(ip_hdr(skb));
- return skb;
+
+ return err;
}
/*
@@ -605,9 +569,8 @@
* Currently handles error types - unreachable, quench, ttl exceeded.
* (Only used in VS/NAT)
*/
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -619,10 +582,8 @@
/* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- if (!skb)
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
- *pskb = skb;
}
iph = ip_hdr(skb);
@@ -690,9 +651,8 @@
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
- if (!ip_vs_make_skb_writable(pskb, offset))
+ if (!skb_make_writable(skb, offset))
goto out;
- skb = *pskb;
ip_vs_nat_icmp(skb, pp, cp, 1);
@@ -724,11 +684,10 @@
* rewrite addresses of the packet and send it on its way...
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -741,11 +700,10 @@
iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_out_icmp(pskb, &related);
+ int related, verdict = ip_vs_out_icmp(skb, &related);
if (related)
return verdict;
- skb = *pskb;
iph = ip_hdr(skb);
}
@@ -756,11 +714,9 @@
/* reassemble IP fragments */
if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- if (!skb)
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
iph = ip_hdr(skb);
- *pskb = skb;
}
ihl = iph->ihl << 2;
@@ -802,13 +758,12 @@
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
- if (!ip_vs_make_skb_writable(pskb, ihl))
+ if (!skb_make_writable(skb, ihl))
goto drop;
/* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
goto drop;
- skb = *pskb;
ip_hdr(skb)->saddr = cp->vaddr;
ip_send_check(ip_hdr(skb));
@@ -818,9 +773,8 @@
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
- skb = *pskb;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
@@ -835,7 +789,7 @@
drop:
ip_vs_conn_put(cp);
- kfree_skb(*pskb);
+ kfree_skb(skb);
return NF_STOLEN;
}
@@ -847,9 +801,8 @@
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
static int
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -861,12 +814,9 @@
/* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb,
- hooknum == NF_IP_LOCAL_IN ?
- IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
- if (!skb)
+ if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ?
+ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
return NF_STOLEN;
- *pskb = skb;
}
iph = ip_hdr(skb);
@@ -945,11 +895,10 @@
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -971,11 +920,10 @@
iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+ int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
- skb = *pskb;
iph = ip_hdr(skb);
}
@@ -1056,16 +1004,16 @@
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
int r;
- if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
- return ip_vs_in_icmp(pskb, &r, hooknum);
+ return ip_vs_in_icmp(skb, &r, hooknum);
}
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 344ddbb..59aa166 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -30,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/netfilter.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <asm/unaligned.h>
@@ -135,7 +136,7 @@
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
*/
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff **pskb, int *diff)
+ struct sk_buff *skb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
@@ -155,14 +156,14 @@
return 1;
/* Linear packets are much easier to deal with. */
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (cp->app_data == &ip_vs_ftp_pasv) {
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
- data_limit = skb_tail_pointer(*pskb);
+ data_limit = skb_tail_pointer(skb);
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
@@ -213,7 +214,7 @@
memcpy(start, buf, buf_len);
ret = 1;
} else {
- ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
+ ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
end-start, buf, buf_len);
}
@@ -238,7 +239,7 @@
* the client.
*/
static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff **pskb, int *diff)
+ struct sk_buff *skb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
@@ -256,20 +257,20 @@
return 1;
/* Linear packets are much easier to deal with. */
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
/*
* Detecting whether it is passive
*/
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
data = data_start = (char *)th + (th->doff << 2);
- data_limit = skb_tail_pointer(*pskb);
+ data_limit = skb_tail_pointer(skb);
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index e65577a..12dc0d6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -20,6 +20,7 @@
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h>
@@ -122,27 +123,27 @@
static int
-tcp_snat_handler(struct sk_buff **pskb,
+tcp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- const unsigned int tcphoff = ip_hdrlen(*pskb);
+ const unsigned int tcphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/* Call application helper if needed */
- if (!ip_vs_app_pkt_out(cp, pskb))
+ if (!ip_vs_app_pkt_out(cp, skb))
return 0;
}
- tcph = (void *)ip_hdr(*pskb) + tcphoff;
+ tcph = (void *)ip_hdr(skb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
@@ -150,17 +151,15 @@
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
tcph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- (*pskb)->len - tcphoff, 0);
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- (*pskb)->len - tcphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, tcph->check,
(char*)&(tcph->check) - (char*)tcph);
@@ -170,30 +169,30 @@
static int
-tcp_dnat_handler(struct sk_buff **pskb,
+tcp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- const unsigned int tcphoff = ip_hdrlen(*pskb);
+ const unsigned int tcphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff
*/
- if (!ip_vs_app_pkt_in(cp, pskb))
+ if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
- tcph = (void *)ip_hdr(*pskb) + tcphoff;
+ tcph = (void *)ip_hdr(skb) + tcphoff;
tcph->dest = cp->dport;
/*
@@ -203,18 +202,16 @@
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
tcph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- (*pskb)->len - tcphoff, 0);
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- (*pskb)->len - tcphoff,
- cp->protocol,
- (*pskb)->csum);
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ee5fe6..1fa7b33 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -18,6 +18,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/kernel.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/udp.h>
@@ -129,29 +130,29 @@
}
static int
-udp_snat_handler(struct sk_buff **pskb,
+udp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- const unsigned int udphoff = ip_hdrlen(*pskb);
+ const unsigned int udphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Call application helper if needed
*/
- if (!ip_vs_app_pkt_out(cp, pskb))
+ if (!ip_vs_app_pkt_out(cp, skb))
return 0;
}
- udph = (void *)ip_hdr(*pskb) + udphoff;
+ udph = (void *)ip_hdr(skb) + udphoff;
udph->source = cp->vport;
/*
@@ -161,17 +162,15 @@
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
udph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, udphoff,
- (*pskb)->len - udphoff, 0);
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- (*pskb)->len - udphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -183,30 +182,30 @@
static int
-udp_dnat_handler(struct sk_buff **pskb,
+udp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = ip_hdrlen(*pskb);
+ unsigned int udphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Attempt ip_vs_app call.
* It will fix ip_vs_conn
*/
- if (!ip_vs_app_pkt_in(cp, pskb))
+ if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
- udph = (void *)ip_hdr(*pskb) + udphoff;
+ udph = (void *)ip_hdr(skb) + udphoff;
udph->dest = cp->dport;
/*
@@ -216,20 +215,18 @@
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
udph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, udphoff,
- (*pskb)->len - udphoff, 0);
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- (*pskb)->len - udphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
}
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 666e080..d0a92de 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -253,7 +253,7 @@
}
/* copy-on-write the packet before mangling it */
- if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
@@ -264,7 +264,7 @@
skb->dst = &rt->u.dst;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
ip_hdr(skb)->daddr = cp->daddr;
ip_send_check(ip_hdr(skb));
@@ -529,7 +529,7 @@
}
/* copy-on-write the packet before mangling it */
- if (!ip_vs_make_skb_writable(&skb, offset))
+ if (!skb_make_writable(skb, offset))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b441929..5539deb 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -3,14 +3,15 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
+#include <linux/skbuff.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/ip.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
{
- const struct iphdr *iph = ip_hdr(*pskb);
+ const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi fl = {};
struct dst_entry *odst;
@@ -29,14 +30,14 @@
if (type == RTN_LOCAL)
fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
- fl.mark = (*pskb)->mark;
+ fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ fl.mark = skb->mark;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
/* Drop old route. */
- dst_release((*pskb)->dst);
- (*pskb)->dst = &rt->u.dst;
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
@@ -44,8 +45,8 @@
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
- odst = (*pskb)->dst;
- if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+ odst = skb->dst;
+ if (ip_route_input(skb, iph->daddr, iph->saddr,
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
dst_release(&rt->u.dst);
return -1;
@@ -54,70 +55,54 @@
dst_release(odst);
}
- if ((*pskb)->dst->error)
+ if (skb->dst->error)
return -1;
#ifdef CONFIG_XFRM
- if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
- if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+ if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ xfrm_decode_session(skb, &fl, AF_INET) == 0)
+ if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
return -1;
#endif
/* Change in oif may mean change in hh_len. */
- hh_len = (*pskb)->dst->dev->hard_header_len;
- if (skb_headroom(*pskb) < hh_len) {
- struct sk_buff *nskb;
-
- nskb = skb_realloc_headroom(*pskb, hh_len);
- if (!nskb)
- return -1;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
+ hh_len = skb->dst->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ return -1;
return 0;
}
EXPORT_SYMBOL(ip_route_me_harder);
#ifdef CONFIG_XFRM
-int ip_xfrm_me_harder(struct sk_buff **pskb)
+int ip_xfrm_me_harder(struct sk_buff *skb)
{
struct flowi fl;
unsigned int hh_len;
struct dst_entry *dst;
- if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+ if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
return 0;
- if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+ if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
return -1;
- dst = (*pskb)->dst;
+ dst = skb->dst;
if (dst->xfrm)
dst = ((struct xfrm_dst *)dst)->route;
dst_hold(dst);
- if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+ if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0)
return -1;
- dst_release((*pskb)->dst);
- (*pskb)->dst = dst;
+ dst_release(skb->dst);
+ skb->dst = dst;
/* Change in oif may mean change in hh_len. */
- hh_len = (*pskb)->dst->dev->hard_header_len;
- if (skb_headroom(*pskb) < hh_len) {
- struct sk_buff *nskb;
-
- nskb = skb_realloc_headroom(*pskb, hh_len);
- if (!nskb)
- return -1;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
+ hh_len = skb->dst->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ return -1;
return 0;
}
EXPORT_SYMBOL(ip_xfrm_me_harder);
@@ -150,17 +135,17 @@
}
}
-static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info)
{
const struct ip_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(*pskb);
+ const struct iphdr *iph = ip_hdr(skb);
if (!(iph->tos == rt_info->tos
&& iph->daddr == rt_info->daddr
&& iph->saddr == rt_info->saddr))
- return ip_route_me_harder(pskb, RTN_UNSPEC);
+ return ip_route_me_harder(skb, RTN_UNSPEC);
}
return 0;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 29114a9..2909c92 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -197,7 +197,7 @@
return 1;
}
-static unsigned int arpt_error(struct sk_buff **pskb,
+static unsigned int arpt_error(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -215,7 +215,7 @@
return (struct arpt_entry *)(base + offset);
}
-unsigned int arpt_do_table(struct sk_buff **pskb,
+unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -231,9 +231,9 @@
struct xt_table_info *private;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
- (2 * (*pskb)->dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+ (2 * skb->dev->addr_len) +
+ (2 * sizeof(u32)))))
return NF_DROP;
indev = in ? in->name : nulldevname;
@@ -245,14 +245,14 @@
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
- arp = arp_hdr(*pskb);
+ arp = arp_hdr(skb);
do {
- if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
+ if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
struct arpt_entry_target *t;
int hdr_len;
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
- (2 * (*pskb)->dev->addr_len);
+ (2 * skb->dev->addr_len);
ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -290,14 +290,14 @@
/* Targets which reenter must return
* abs. verdicts
*/
- verdict = t->u.kernel.target->target(pskb,
+ verdict = t->u.kernel.target->target(skb,
in, out,
hook,
t->u.kernel.target,
t->data);
/* Target might have changed stuff. */
- arp = arp_hdr(*pskb);
+ arp = arp_hdr(skb);
if (verdict == ARPT_CONTINUE)
e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index c4bdab4..45fa4e2 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -1,5 +1,6 @@
/* module that allows mangling of the arp payload */
#include <linux/module.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_arp/arpt_mangle.h>
#include <net/sock.h>
@@ -8,7 +9,7 @@
MODULE_DESCRIPTION("arptables arp payload mangle target");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
const void *targinfo)
@@ -18,47 +19,38 @@
unsigned char *arpptr;
int pln, hln;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, skb->len))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
-
- arp = arp_hdr(*pskb);
- arpptr = skb_network_header(*pskb) + sizeof(*arp);
+ arp = arp_hdr(skb);
+ arpptr = skb_network_header(skb) + sizeof(*arp);
pln = arp->ar_pln;
hln = arp->ar_hln;
/* We assume that pln and hln were checked in the match */
if (mangle->flags & ARPT_MANGLE_SDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > skb_tail_pointer(*pskb)))
+ (arpptr + hln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, mangle->src_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_SIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > skb_tail_pointer(*pskb)))
+ (arpptr + pln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_s.src_ip, pln);
}
arpptr += pln;
if (mangle->flags & ARPT_MANGLE_TDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > skb_tail_pointer(*pskb)))
+ (arpptr + hln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, mangle->tgt_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_TIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > skb_tail_pointer(*pskb)))
+ (arpptr + pln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
}
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 75c0230..302d3da 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -56,12 +56,12 @@
/* The work comes in here from netfilter.c */
static unsigned int arpt_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return arpt_do_table(pskb, hook, in, out, &packet_filter);
+ return arpt_do_table(skb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 23cbfc7..10a2ce0 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -335,6 +335,7 @@
ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
{
int diff;
+ int err;
struct iphdr *user_iph = (struct iphdr *)v->payload;
if (v->data_len < sizeof(*user_iph))
@@ -347,25 +348,18 @@
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
- printk(KERN_WARNING "ip_queue: OOM "
- "in mangle, dropping packet\n");
- return -ENOMEM;
+ err = pskb_expand_head(e->skb, 0,
+ diff - skb_tailroom(e->skb),
+ GFP_ATOMIC);
+ if (err) {
+ printk(KERN_WARNING "ip_queue: error "
+ "in mangle, dropping packet: %d\n", -err);
+ return err;
}
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(&e->skb, v->data_len))
+ if (!skb_make_writable(e->skb, v->data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6486894..4b10b98 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -169,7 +169,7 @@
}
static unsigned int
-ipt_error(struct sk_buff **pskb,
+ipt_error(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -312,7 +312,7 @@
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ipt_do_table(struct sk_buff **pskb,
+ipt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -331,8 +331,8 @@
struct xt_table_info *private;
/* Initialization */
- ip = ip_hdr(*pskb);
- datalen = (*pskb)->len - ip->ihl * 4;
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -359,7 +359,7 @@
struct ipt_entry_target *t;
if (IPT_MATCH_ITERATE(e, do_match,
- *pskb, in, out,
+ skb, in, out,
offset, &hotdrop) != 0)
goto no_match;
@@ -371,8 +371,8 @@
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
/* The packet is traced: log it */
- if (unlikely((*pskb)->nf_trace))
- trace_packet(*pskb, hook, in, out,
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
table->name, private, e);
#endif
/* Standard target? */
@@ -410,7 +410,7 @@
((struct ipt_entry *)table_base)->comefrom
= 0xeeeeeeec;
#endif
- verdict = t->u.kernel.target->target(pskb,
+ verdict = t->u.kernel.target->target(skb,
in, out,
hook,
t->u.kernel.target,
@@ -428,8 +428,8 @@
= 0x57acc001;
#endif
/* Target might have changed stuff. */
- ip = ip_hdr(*pskb);
- datalen = (*pskb)->len - ip->ihl * 4;
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 27f14e1..2f544da 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -289,7 +289,7 @@
***********************************************************************/
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -305,7 +305,7 @@
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL) {
printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
@@ -316,7 +316,7 @@
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
- if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP
&& (ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
return XT_CONTINUE;
@@ -325,7 +325,7 @@
* TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
* on, which all have an ID field [relevant for hashing]. */
- hash = clusterip_hashfn(*pskb, cipinfo->config);
+ hash = clusterip_hashfn(skb, cipinfo->config);
switch (ctinfo) {
case IP_CT_NEW:
@@ -355,7 +355,7 @@
/* despite being received via linklayer multicast, this is
* actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
- (*pskb)->pkt_type = PACKET_HOST;
+ skb->pkt_type = PACKET_HOST;
return XT_CONTINUE;
}
@@ -505,12 +505,12 @@
static unsigned int
arp_mangle(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct arphdr *arp = arp_hdr(*pskb);
+ struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index f1253bd..add1100 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -26,15 +26,15 @@
/* set ECT codepoint from IP header.
* return false if there was an error. */
static inline bool
-set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
{
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos;
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
return false;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
oldtos = iph->tos;
iph->tos &= ~IPT_ECN_IP_MASK;
iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -45,14 +45,13 @@
/* Return false if there was an error. */
static inline bool
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
{
struct tcphdr _tcph, *tcph;
__be16 oldval;
/* Not enought header? */
- tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
- sizeof(_tcph), &_tcph);
+ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (!tcph)
return false;
@@ -62,9 +61,9 @@
tcph->cwr == einfo->proto.tcp.cwr))
return true;
- if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return false;
- tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
+ tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
oldval = ((__be16 *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -72,13 +71,13 @@
if (einfo->operation & IPT_ECN_OP_SET_CWR)
tcph->cwr = einfo->proto.tcp.cwr;
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
oldval, ((__be16 *)tcph)[6], 0);
return true;
}
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -88,12 +87,12 @@
const struct ipt_ECN_info *einfo = targinfo;
if (einfo->operation & IPT_ECN_OP_SET_IP)
- if (!set_ect_ip(pskb, einfo))
+ if (!set_ect_ip(skb, einfo))
return NF_DROP;
if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
- && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
- if (!set_ect_tcp(pskb, einfo))
+ && ip_hdr(skb)->protocol == IPPROTO_TCP)
+ if (!set_ect_tcp(skb, einfo))
return NF_DROP;
return XT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 127a5e8..4b5e821 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -418,7 +418,7 @@
}
static unsigned int
-ipt_log_target(struct sk_buff **pskb,
+ipt_log_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -432,7 +432,7 @@
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+ ipt_log_packet(PF_INET, hooknum, skb, in, out, &li,
loginfo->prefix);
return XT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 3e0b562..44b516e 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -52,7 +52,7 @@
}
static unsigned int
-masquerade_target(struct sk_buff **pskb,
+masquerade_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -69,7 +69,7 @@
NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
nat = nfct_nat(ct);
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
@@ -82,7 +82,7 @@
return NF_ACCEPT;
mr = targinfo;
- rt = (struct rtable *)(*pskb)->dst;
+ rt = (struct rtable *)skb->dst;
newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
if (!newsrc) {
printk("MASQUERADE: %s ate my IP address\n", out->name);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 41a011d..f869929 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -43,7 +43,7 @@
}
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -59,14 +59,14 @@
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
- new_ip = ip_hdr(*pskb)->daddr & ~netmask;
+ new_ip = ip_hdr(skb)->daddr & ~netmask;
else
- new_ip = ip_hdr(*pskb)->saddr & ~netmask;
+ new_ip = ip_hdr(skb)->saddr & ~netmask;
new_ip |= mr->range[0].min_ip & netmask;
newrange = ((struct nf_nat_range)
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 6ac7a23..f7cf7d6 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -47,7 +47,7 @@
}
static unsigned int
-redirect_target(struct sk_buff **pskb,
+redirect_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -63,7 +63,7 @@
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
/* Local packets: make them go to loopback */
@@ -76,7 +76,7 @@
newdst = 0;
rcu_read_lock();
- indev = __in_dev_get_rcu((*pskb)->dev);
+ indev = __in_dev_get_rcu(skb->dev);
if (indev && (ifa = indev->ifa_list))
newdst = ifa->ifa_local;
rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cb038c8..dcf4d21 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -131,7 +131,7 @@
)
addr_type = RTN_LOCAL;
- if (ip_route_me_harder(&nskb, addr_type))
+ if (ip_route_me_harder(nskb, addr_type))
goto free_nskb;
nskb->ip_summed = CHECKSUM_NONE;
@@ -162,7 +162,7 @@
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
-static unsigned int reject(struct sk_buff **pskb,
+static unsigned int reject(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -173,7 +173,7 @@
/* Our naive response construction doesn't deal with IP
options, and probably shouldn't try. */
- if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
+ if (ip_hdrlen(skb) != sizeof(struct iphdr))
return NF_DROP;
/* WARNING: This code causes reentry within iptables.
@@ -181,28 +181,28 @@
must return an absolute verdict. --RR */
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- send_unreach(*pskb, ICMP_NET_UNREACH);
+ send_unreach(skb, ICMP_NET_UNREACH);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- send_unreach(*pskb, ICMP_HOST_UNREACH);
+ send_unreach(skb, ICMP_HOST_UNREACH);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- send_unreach(*pskb, ICMP_PROT_UNREACH);
+ send_unreach(skb, ICMP_PROT_UNREACH);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- send_unreach(*pskb, ICMP_PORT_UNREACH);
+ send_unreach(skb, ICMP_PORT_UNREACH);
break;
case IPT_ICMP_NET_PROHIBITED:
- send_unreach(*pskb, ICMP_NET_ANO);
+ send_unreach(skb, ICMP_NET_ANO);
break;
case IPT_ICMP_HOST_PROHIBITED:
- send_unreach(*pskb, ICMP_HOST_ANO);
+ send_unreach(skb, ICMP_HOST_ANO);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
- send_unreach(*pskb, ICMP_PKT_FILTERED);
+ send_unreach(skb, ICMP_PKT_FILTERED);
break;
case IPT_TCP_RESET:
- send_reset(*pskb, hooknum);
+ send_reset(skb, hooknum);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 97641f1..8988571 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -104,7 +104,7 @@
}
static unsigned int
-same_target(struct sk_buff **pskb,
+same_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -121,7 +121,7 @@
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
hooknum == NF_IP_POST_ROUTING);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 25f5d0b..d4573ba 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -21,7 +21,7 @@
MODULE_DESCRIPTION("iptables TOS mangling module");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -29,13 +29,13 @@
const void *targinfo)
{
const struct ipt_tos_target_info *tosinfo = targinfo;
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
__u8 oldtos;
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
oldtos = iph->tos;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 2b54e7b..c620a05 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,7 +20,7 @@
MODULE_LICENSE("GPL");
static unsigned int
-ipt_ttl_target(struct sk_buff **pskb,
+ipt_ttl_target(struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
const void *targinfo)
@@ -29,10 +29,10 @@
const struct ipt_TTL_info *info = targinfo;
int new_ttl;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
switch (info->mode) {
case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index c636d6d..212b830 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -279,7 +279,7 @@
spin_unlock_bh(&ulog_lock);
}
-static unsigned int ipt_ulog_target(struct sk_buff **pskb,
+static unsigned int ipt_ulog_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -288,7 +288,7 @@
{
struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
- ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
+ ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL);
return XT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 4f51c1d..ba3262c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -62,31 +62,31 @@
/* The work comes in here from netfilter.c. */
static unsigned int
ipt_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_filter);
+ return ipt_do_table(skb, hook, in, out, &packet_filter);
}
static unsigned int
ipt_local_out_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_filter: ignoring short SOCK_RAW "
"packet.\n");
return NF_ACCEPT;
}
- return ipt_do_table(pskb, hook, in, out, &packet_filter);
+ return ipt_do_table(skb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 902446f..b4360a6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -75,17 +75,17 @@
/* The work comes in here from netfilter.c. */
static unsigned int
ipt_route_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_mangler);
+ return ipt_do_table(skb, hook, in, out, &packet_mangler);
}
static unsigned int
ipt_local_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -97,8 +97,8 @@
u_int32_t mark;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_mangle: ignoring short SOCK_RAW "
"packet.\n");
@@ -106,22 +106,22 @@
}
/* Save things which could affect route */
- mark = (*pskb)->mark;
- iph = ip_hdr(*pskb);
+ mark = skb->mark;
+ iph = ip_hdr(skb);
saddr = iph->saddr;
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
+ ret = ipt_do_table(skb, hook, in, out, &packet_mangler);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
if (iph->saddr != saddr ||
iph->daddr != daddr ||
- (*pskb)->mark != mark ||
+ skb->mark != mark ||
iph->tos != tos)
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
ret = NF_DROP;
}
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index d6e5033..5de6e57 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -47,30 +47,30 @@
/* The work comes in here from netfilter.c. */
static unsigned int
ipt_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_raw);
+ return ipt_do_table(skb, hook, in, out, &packet_raw);
}
static unsigned int
ipt_local_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_raw: ignoring short SOCK_RAW"
"packet.\n");
return NF_ACCEPT;
}
- return ipt_do_table(pskb, hook, in, out, &packet_raw);
+ return ipt_do_table(skb, hook, in, out, &packet_raw);
}
/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2fcb924..831e9b2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,19 +63,20 @@
}
/* Returns new sk_buff, or NULL */
-static struct sk_buff *
-nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
+ int err;
+
skb_orphan(skb);
local_bh_disable();
- skb = ip_defrag(skb, user);
+ err = ip_defrag(skb, user);
local_bh_enable();
- if (skb)
+ if (!err)
ip_send_check(ip_hdr(skb));
- return skb;
+ return err;
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
@@ -99,17 +100,17 @@
}
static unsigned int ipv4_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(pskb);
+ return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_conntrack_help(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -120,7 +121,7 @@
struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
return NF_ACCEPT;
@@ -131,56 +132,55 @@
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
- return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
- if ((*pskb)->nfct)
+ if (skb->nfct)
return NF_ACCEPT;
/* Gather fragments. */
- if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- *pskb = nf_ct_ipv4_gather_frags(*pskb,
- hooknum == NF_IP_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT);
- if (!*pskb)
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (nf_ct_ipv4_gather_frags(skb,
+ hooknum == NF_IP_PRE_ROUTING ?
+ IP_DEFRAG_CONNTRACK_IN :
+ IP_DEFRAG_CONNTRACK_OUT))
return NF_STOLEN;
}
return NF_ACCEPT;
}
static unsigned int ipv4_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(PF_INET, hooknum, pskb);
+ return nf_conntrack_in(PF_INET, hooknum, skb);
}
static unsigned int ipv4_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET, hooknum, pskb);
+ return nf_conntrack_in(PF_INET, hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index bd93a1d..35a5aa6 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -24,7 +24,7 @@
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_amanda");
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -53,7 +53,7 @@
return NF_DROP;
sprintf(buffer, "%u", port);
- ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+ ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
matchoff, matchlen,
buffer, strlen(buffer));
if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 7221aa2..56e93f6 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -349,7 +349,7 @@
/* Returns true if succeeded. */
static int
manip_pkt(u_int16_t proto,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
@@ -357,19 +357,19 @@
struct iphdr *iph;
struct nf_nat_protocol *p;
- if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
return 0;
- iph = (void *)(*pskb)->data + iphdroff;
+ iph = (void *)skb->data + iphdroff;
/* Manipulate protcol part. */
/* rcu_read_lock()ed by nf_hook_slow */
p = __nf_nat_proto_find(proto);
- if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
+ if (!p->manip_pkt(skb, iphdroff, target, maniptype))
return 0;
- iph = (void *)(*pskb)->data + iphdroff;
+ iph = (void *)skb->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
@@ -385,7 +385,7 @@
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
- struct sk_buff **pskb)
+ struct sk_buff *skb)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
@@ -407,7 +407,7 @@
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+ if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
return NF_DROP;
}
return NF_ACCEPT;
@@ -418,7 +418,7 @@
int nf_nat_icmp_reply_translation(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
- struct sk_buff **pskb)
+ struct sk_buff *skb)
{
struct {
struct icmphdr icmp;
@@ -426,24 +426,24 @@
} *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
- int hdrlen = ip_hdrlen(*pskb);
+ int hdrlen = ip_hdrlen(skb);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
- if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
return 0;
- inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+ inside = (void *)skb->data + ip_hdrlen(skb);
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
- if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
return 0;
/* Must be RELATED */
- NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
- (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
+ skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
/* Redirects on non-null nats must be dropped, else they'll
start talking to each other without our translation, and be
@@ -458,15 +458,15 @@
}
pr_debug("icmp_reply_translation: translating error %p manip %u "
- "dir %s\n", *pskb, manip,
+ "dir %s\n", skb, manip,
dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
- if (!nf_ct_get_tuple(*pskb,
- ip_hdrlen(*pskb) + sizeof(struct icmphdr),
- (ip_hdrlen(*pskb) +
+ if (!nf_ct_get_tuple(skb,
+ ip_hdrlen(skb) + sizeof(struct icmphdr),
+ (ip_hdrlen(skb) +
sizeof(struct icmphdr) + inside->ip.ihl * 4),
(u_int16_t)AF_INET,
inside->ip.protocol,
@@ -478,19 +478,19 @@
pass all hooks (locally-generated ICMP). Consider incoming
packet: PREROUTING (DST manip), routing produces ICMP, goes
through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, pskb,
- ip_hdrlen(*pskb) + sizeof(inside->icmp),
+ if (!manip_pkt(inside->ip.protocol, skb,
+ ip_hdrlen(skb) + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple,
!manip))
return 0;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
/* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+ inside = (void *)skb->data + ip_hdrlen(skb);
inside->icmp.checksum = 0;
inside->icmp.checksum =
- csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen, 0));
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
}
/* Change outer to look the reply to an incoming packet
@@ -506,7 +506,7 @@
if (ct->status & statusbit) {
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, pskb, 0, &target, manip))
+ if (!manip_pkt(0, skb, 0, &target, manip))
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 3663bd8..e1a16d3 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -28,7 +28,7 @@
/* FIXME: Time out? --RR */
static int
-mangle_rfc959_packet(struct sk_buff **pskb,
+mangle_rfc959_packet(struct sk_buff *skb,
__be32 newip,
u_int16_t port,
unsigned int matchoff,
@@ -43,13 +43,13 @@
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
/* |1|132.235.1.2|6275| */
static int
-mangle_eprt_packet(struct sk_buff **pskb,
+mangle_eprt_packet(struct sk_buff *skb,
__be32 newip,
u_int16_t port,
unsigned int matchoff,
@@ -63,13 +63,13 @@
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
/* |1|132.235.1.2|6275| */
static int
-mangle_epsv_packet(struct sk_buff **pskb,
+mangle_epsv_packet(struct sk_buff *skb,
__be32 newip,
u_int16_t port,
unsigned int matchoff,
@@ -83,11 +83,11 @@
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
+static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
unsigned int, unsigned int, struct nf_conn *,
enum ip_conntrack_info)
= {
@@ -99,7 +99,7 @@
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
-static unsigned int nf_nat_ftp(struct sk_buff **pskb,
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
unsigned int matchoff,
@@ -132,7 +132,7 @@
if (port == 0)
return NF_DROP;
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
+ if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
nf_ct_unexpect_related(exp);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c1b059a..a868c8c 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -22,12 +22,12 @@
#include <linux/netfilter/nf_conntrack_h323.h>
/****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
+static int set_addr(struct sk_buff *skb,
unsigned char **data, int dataoff,
unsigned int addroff, __be32 ip, __be16 port)
{
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
struct {
__be32 ip;
__be16 port;
@@ -38,8 +38,8 @@
buf.port = port;
addroff += dataoff;
- if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
- if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
if (net_ratelimit())
@@ -49,14 +49,13 @@
}
/* Relocate data pointer */
- th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+ th = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_tcph), &_tcph);
if (th == NULL)
return -1;
- *data = (*pskb)->data + ip_hdrlen(*pskb) +
- th->doff * 4 + dataoff;
+ *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
} else {
- if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
if (net_ratelimit())
@@ -67,36 +66,35 @@
/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
* or pull everything in a linear buffer, so we can safely
* use the skb pointers now */
- *data = ((*pskb)->data + ip_hdrlen(*pskb) +
- sizeof(struct udphdr));
+ *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
}
return 0;
}
/****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
+static int set_h225_addr(struct sk_buff *skb,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
{
- return set_addr(pskb, data, dataoff, taddr->ipAddress.ip,
+ return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
addr->ip, port);
}
/****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
+static int set_h245_addr(struct sk_buff *skb,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
{
- return set_addr(pskb, data, dataoff,
+ return set_addr(skb, data, dataoff,
taddr->unicastAddress.iPAddress.network,
addr->ip, port);
}
/****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count)
@@ -125,7 +123,7 @@
NIPQUAD(addr.ip), port,
NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &taddr[i],
+ return set_h225_addr(skb, data, 0, &taddr[i],
&ct->tuplehash[!dir].
tuple.dst.u3,
info->sig_port[!dir]);
@@ -137,7 +135,7 @@
NIPQUAD(addr.ip), port,
NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip),
info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &taddr[i],
+ return set_h225_addr(skb, data, 0, &taddr[i],
&ct->tuplehash[!dir].
tuple.src.u3,
info->sig_port[!dir]);
@@ -149,7 +147,7 @@
}
/****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count)
@@ -168,7 +166,7 @@
NIPQUAD(addr.ip), ntohs(port),
NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
- return set_h225_addr(pskb, data, 0, &taddr[i],
+ return set_h225_addr(skb, data, 0, &taddr[i],
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.
dst.u.udp.port);
@@ -179,7 +177,7 @@
}
/****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
@@ -244,7 +242,7 @@
}
/* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, taddr,
+ if (set_h245_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons((port & htons(1)) ? nated_port + 1 :
nated_port)) == 0) {
@@ -273,7 +271,7 @@
}
/****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
@@ -301,7 +299,7 @@
}
/* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, taddr,
+ if (set_h245_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) < 0) {
nf_ct_unexpect_related(exp);
@@ -318,7 +316,7 @@
}
/****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
@@ -351,7 +349,7 @@
}
/* Modify signal */
- if (set_h225_addr(pskb, data, dataoff, taddr,
+ if (set_h225_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -406,7 +404,7 @@
}
/****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, TransportAddress *taddr, int idx,
__be16 port, struct nf_conntrack_expect *exp)
@@ -439,7 +437,7 @@
}
/* Modify signal */
- if (set_h225_addr(pskb, data, 0, &taddr[idx],
+ if (set_h225_addr(skb, data, 0, &taddr[idx],
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -450,7 +448,7 @@
if (idx > 0 &&
get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
(ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr(pskb, data, 0, &taddr[0],
+ set_h225_addr(skb, data, 0, &taddr[0],
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
}
@@ -495,7 +493,7 @@
}
/****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
@@ -525,7 +523,7 @@
}
/* Modify signal */
- if (!set_h225_addr(pskb, data, dataoff, taddr,
+ if (!set_h225_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 93d8a0a..8718da0 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -111,22 +111,14 @@
}
/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
{
- struct sk_buff *nskb;
-
- if ((*pskb)->len + extra > 65535)
+ if (skb->len + extra > 65535)
return 0;
- nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
- if (!nskb)
+ if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
return 0;
- /* Transfer socket to new skb. */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
return 1;
}
@@ -139,7 +131,7 @@
*
* */
int
-nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
+nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
@@ -147,37 +139,37 @@
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct rtable *rt = (struct rtable *)skb->dst;
struct iphdr *iph;
struct tcphdr *tcph;
int oldlen, datalen;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (rep_len > match_len &&
- rep_len - match_len > skb_tailroom(*pskb) &&
- !enlarge_skb(pskb, rep_len - match_len))
+ rep_len - match_len > skb_tailroom(skb) &&
+ !enlarge_skb(skb, rep_len - match_len))
return 0;
- SKB_LINEAR_ASSERT(*pskb);
+ SKB_LINEAR_ASSERT(skb);
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
tcph = (void *)iph + iph->ihl*4;
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+ oldlen = skb->len - iph->ihl*4;
+ mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
match_offset, match_len, rep_buffer, rep_len);
- datalen = (*pskb)->len - iph->ihl*4;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ datalen = skb->len - iph->ihl*4;
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt_flags & RTCF_LOCAL) &&
- (*pskb)->dev->features & NETIF_F_V4_CSUM) {
- (*pskb)->ip_summed = CHECKSUM_PARTIAL;
- (*pskb)->csum_start = skb_headroom(*pskb) +
- skb_network_offset(*pskb) +
- iph->ihl * 4;
- (*pskb)->csum_offset = offsetof(struct tcphdr, check);
+ skb->dev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ iph->ihl * 4;
+ skb->csum_offset = offsetof(struct tcphdr, check);
tcph->check = ~tcp_v4_check(datalen,
iph->saddr, iph->daddr, 0);
} else {
@@ -188,7 +180,7 @@
datalen, 0));
}
} else
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
htons(oldlen), htons(datalen), 1);
if (rep_len != match_len) {
@@ -197,7 +189,7 @@
(int)rep_len - (int)match_len,
ct, ctinfo);
/* Tell TCP window tracking about seq change */
- nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
+ nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
ct, CTINFO2DIR(ctinfo));
}
return 1;
@@ -215,7 +207,7 @@
* should be fairly easy to do.
*/
int
-nf_nat_mangle_udp_packet(struct sk_buff **pskb,
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
@@ -223,48 +215,48 @@
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct rtable *rt = (struct rtable *)skb->dst;
struct iphdr *iph;
struct udphdr *udph;
int datalen, oldlen;
/* UDP helpers might accidentally mangle the wrong packet */
- iph = ip_hdr(*pskb);
- if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
+ iph = ip_hdr(skb);
+ if (skb->len < iph->ihl*4 + sizeof(*udph) +
match_offset + match_len)
return 0;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (rep_len > match_len &&
- rep_len - match_len > skb_tailroom(*pskb) &&
- !enlarge_skb(pskb, rep_len - match_len))
+ rep_len - match_len > skb_tailroom(skb) &&
+ !enlarge_skb(skb, rep_len - match_len))
return 0;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
udph = (void *)iph + iph->ihl*4;
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+ oldlen = skb->len - iph->ihl*4;
+ mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
/* update the length of the UDP packet */
- datalen = (*pskb)->len - iph->ihl*4;
+ datalen = skb->len - iph->ihl*4;
udph->len = htons(datalen);
/* fix udp checksum if udp checksum was previously calculated */
- if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+ if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
return 1;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt_flags & RTCF_LOCAL) &&
- (*pskb)->dev->features & NETIF_F_V4_CSUM) {
- (*pskb)->ip_summed = CHECKSUM_PARTIAL;
- (*pskb)->csum_start = skb_headroom(*pskb) +
- skb_network_offset(*pskb) +
- iph->ihl * 4;
- (*pskb)->csum_offset = offsetof(struct udphdr, check);
+ skb->dev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ iph->ihl * 4;
+ skb->csum_offset = offsetof(struct udphdr, check);
udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
datalen, IPPROTO_UDP,
0);
@@ -278,7 +270,7 @@
udph->check = CSUM_MANGLED_0;
}
} else
- nf_proto_csum_replace2(&udph->check, *pskb,
+ nf_proto_csum_replace2(&udph->check, skb,
htons(oldlen), htons(datalen), 1);
return 1;
@@ -330,7 +322,7 @@
/* TCP SACK sequence number adjustment */
static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff **pskb,
+nf_nat_sack_adjust(struct sk_buff *skb,
struct tcphdr *tcph,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -338,17 +330,17 @@
unsigned int dir, optoff, optend;
struct nf_conn_nat *nat = nfct_nat(ct);
- optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
- optend = ip_hdrlen(*pskb) + tcph->doff * 4;
+ optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
+ optend = ip_hdrlen(skb) + tcph->doff * 4;
- if (!skb_make_writable(pskb, optend))
+ if (!skb_make_writable(skb, optend))
return 0;
dir = CTINFO2DIR(ctinfo);
while (optoff < optend) {
/* Usually: option, length. */
- unsigned char *op = (*pskb)->data + optoff;
+ unsigned char *op = skb->data + optoff;
switch (op[0]) {
case TCPOPT_EOL:
@@ -365,7 +357,7 @@
if (op[0] == TCPOPT_SACK &&
op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(*pskb, tcph, optoff+2,
+ sack_adjust(skb, tcph, optoff+2,
optoff+op[1], &nat->seq[!dir]);
optoff += op[1];
}
@@ -375,7 +367,7 @@
/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
int
-nf_nat_seq_adjust(struct sk_buff **pskb,
+nf_nat_seq_adjust(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
@@ -390,10 +382,10 @@
this_way = &nat->seq[dir];
other_way = &nat->seq[!dir];
- if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return 0;
- tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+ tcph = (void *)skb->data + ip_hdrlen(skb);
if (after(ntohl(tcph->seq), this_way->correction_pos))
newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
else
@@ -405,8 +397,8 @@
else
newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
+ nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+ nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
@@ -415,10 +407,10 @@
tcph->seq = newseq;
tcph->ack_seq = newack;
- if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+ if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
return 0;
- nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
+ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index bcf274b..766e2c1 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -27,7 +27,7 @@
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_irc");
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -58,7 +58,7 @@
pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
buffer, NIPQUAD(ip), port);
- ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+ ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
matchoff, matchlen, buffer,
strlen(buffer));
if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 984ec83..e1385a0 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -110,7 +110,7 @@
/* outbound packets == from PNS to PAC */
static int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
@@ -175,7 +175,7 @@
ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
/* mangle packet */
- if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
cid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_callid), (char *)&new_callid,
@@ -213,7 +213,7 @@
/* inbound packets == from PAC to PNS */
static int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
@@ -268,7 +268,7 @@
pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
- if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
pcid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_pcid), (char *)&new_pcid,
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d562290..b820f99 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -98,21 +98,21 @@
/* manipulate a GRE packet according to maniptype */
static int
-gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct gre_hdr *greh;
struct gre_hdr_pptp *pgreh;
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
unsigned int hdroff = iphdroff + iph->ihl * 4;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8))
+ if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
return 0;
- greh = (void *)(*pskb)->data + hdroff;
+ greh = (void *)skb->data + hdroff;
pgreh = (struct gre_hdr_pptp *)greh;
/* we only have destination manip of a packet, since 'source key'
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 898d737..b9fc724 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -52,20 +52,20 @@
}
static int
-icmp_manip_pkt(struct sk_buff **pskb,
+icmp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct icmphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return 0;
- hdr = (struct icmphdr *)((*pskb)->data + hdroff);
- nf_proto_csum_replace2(&hdr->checksum, *pskb,
+ hdr = (struct icmphdr *)(skb->data + hdroff);
+ nf_proto_csum_replace2(&hdr->checksum, skb,
hdr->un.echo.id, tuple->src.u.icmp.id, 0);
hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 5bbbb2a..6bab2e18 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -88,12 +88,12 @@
}
static int
-tcp_manip_pkt(struct sk_buff **pskb,
+tcp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct tcphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
@@ -103,14 +103,14 @@
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
- if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+ if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
- if (!skb_make_writable(pskb, hdroff + hdrsize))
+ if (!skb_make_writable(skb, hdroff + hdrsize))
return 0;
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct tcphdr *)(skb->data + hdroff);
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
@@ -132,8 +132,8 @@
if (hdrsize < sizeof(*hdr))
return 1;
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
+ nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index a0af4fd..cbf1a61 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -86,22 +86,22 @@
}
static int
-udp_manip_pkt(struct sk_buff **pskb,
+udp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct udphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
__be16 *portptr, newport;
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return 0;
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct udphdr *)((*pskb)->data + hdroff);
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct udphdr *)(skb->data + hdroff);
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
@@ -116,9 +116,9 @@
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
- if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport,
+ if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
0);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index f50d020..cfd2742 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -37,7 +37,7 @@
}
static int
-unknown_manip_pkt(struct sk_buff **pskb,
+unknown_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 76ec59a..46b25ab 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -65,7 +65,7 @@
};
/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
+static unsigned int ipt_snat_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -78,7 +78,7 @@
NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* Connection must be valid and new. */
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
@@ -107,7 +107,7 @@
ip_rt_put(rt);
}
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+static unsigned int ipt_dnat_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -121,14 +121,14 @@
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
hooknum == NF_IP_LOCAL_OUT);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* Connection must be valid and new. */
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
if (hooknum == NF_IP_LOCAL_OUT &&
mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
+ warn_if_extra_mangle(ip_hdr(skb)->daddr,
mr->range[0].min_ip);
return nf_nat_setup_info(ct, &mr->range[0], hooknum);
@@ -204,7 +204,7 @@
return nf_nat_setup_info(ct, &range, hooknum);
}
-int nf_nat_rule_find(struct sk_buff **pskb,
+int nf_nat_rule_find(struct sk_buff *skb,
unsigned int hooknum,
const struct net_device *in,
const struct net_device *out,
@@ -212,7 +212,7 @@
{
int ret;
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
+ ret = ipt_do_table(skb, hooknum, in, out, &nat_table);
if (ret == NF_ACCEPT) {
if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e14d419..ce9edbc 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -60,7 +60,7 @@
}
}
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
+static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo,
struct nf_conn *ct, const char **dptr, size_t dlen,
enum sip_header_pos pos, struct addr_map *map)
{
@@ -84,15 +84,15 @@
} else
return 1;
- if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
matchoff, matchlen, addr, addrlen))
return 0;
- *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
return 1;
}
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
+static unsigned int ip_nat_sip(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr)
@@ -101,8 +101,8 @@
struct addr_map map;
int dataoff, datalen;
- dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
- datalen = (*pskb)->len - dataoff;
+ dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+ datalen = skb->len - dataoff;
if (datalen < sizeof("SIP/2.0") - 1)
return NF_ACCEPT;
@@ -121,19 +121,19 @@
else
pos = POS_REQ_URI;
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
+ if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map))
return NF_DROP;
}
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+ if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
+ !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
+ !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
+ !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
return NF_DROP;
return NF_ACCEPT;
}
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
+static unsigned int mangle_sip_packet(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr, size_t dlen,
@@ -145,16 +145,16 @@
if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
return 0;
- if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
matchoff, matchlen, buffer, bufflen))
return 0;
/* We need to reload this. Thanks Patrick. */
- *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
return 1;
}
-static int mangle_content_len(struct sk_buff **pskb,
+static int mangle_content_len(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char *dptr)
@@ -163,22 +163,22 @@
char buffer[sizeof("65536")];
int bufflen;
- dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
/* Get actual SDP lenght */
- if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+ if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
&matchlen, POS_SDP_HEADER) > 0) {
/* since ct_sip_get_info() give us a pointer passing 'v='
we need to add 2 bytes in this count. */
- int c_len = (*pskb)->len - dataoff - matchoff + 2;
+ int c_len = skb->len - dataoff - matchoff + 2;
/* Now, update SDP length */
- if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+ if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
&matchlen, POS_CONTENT) > 0) {
bufflen = sprintf(buffer, "%u", c_len);
- return nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ return nf_nat_mangle_udp_packet(skb, ct, ctinfo,
matchoff, matchlen,
buffer, bufflen);
}
@@ -186,7 +186,7 @@
return 0;
}
-static unsigned int mangle_sdp(struct sk_buff **pskb,
+static unsigned int mangle_sdp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
__be32 newip, u_int16_t port,
@@ -195,25 +195,25 @@
char buffer[sizeof("nnn.nnn.nnn.nnn")];
unsigned int dataoff, bufflen;
- dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
/* Mangle owner and contact info. */
bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
buffer, bufflen, POS_OWNER_IP4))
return 0;
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
buffer, bufflen, POS_CONNECTION_IP4))
return 0;
/* Mangle media port. */
bufflen = sprintf(buffer, "%u", port);
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
buffer, bufflen, POS_MEDIA))
return 0;
- return mangle_content_len(pskb, ctinfo, ct, dptr);
+ return mangle_content_len(skb, ctinfo, ct, dptr);
}
static void ip_nat_sdp_expect(struct nf_conn *ct,
@@ -241,7 +241,7 @@
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
+static unsigned int ip_nat_sdp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp,
const char *dptr)
@@ -277,7 +277,7 @@
if (port == 0)
return NF_DROP;
- if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
+ if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
nf_ct_unexpect_related(exp);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 6bfcd3a..03709d6 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1188,9 +1188,9 @@
*/
static int snmp_translate(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- struct sk_buff **pskb)
+ struct sk_buff *skb)
{
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
u_int16_t udplen = ntohs(udph->len);
u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1225,13 +1225,13 @@
/* We don't actually set up expectations, just adjust internal IP
* addresses if this is being NATted */
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
int dir = CTINFO2DIR(ctinfo);
unsigned int ret;
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
@@ -1250,7 +1250,7 @@
* enough room for a UDP header. Just verify the UDP length field so we
* can mess around with the payload.
*/
- if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
if (net_ratelimit())
printk(KERN_WARNING "SNMP: dropping malformed packet "
"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
@@ -1258,11 +1258,11 @@
return NF_DROP;
}
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, pskb);
+ ret = snmp_translate(ct, ctinfo, skb);
spin_unlock_bh(&snmp_lock);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 46cc99d..7db76ea 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -67,7 +67,7 @@
static unsigned int
nf_nat_fn(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -80,9 +80,9 @@
/* We never see fragments: conntrack defrags on pre-routing
and local-out, and nf_nat_out protects post-routing. */
- NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
have dropped it. Hence it's the user's responsibilty to
packet filter it out, or implement conntrack/NAT for that
@@ -91,10 +91,10 @@
/* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in
case we're doing NAT to the same network. */
- if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
struct icmphdr _hdr, *hp;
- hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp != NULL &&
hp->type == ICMP_REDIRECT)
@@ -119,9 +119,9 @@
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
- if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(ct, ctinfo,
- hooknum, pskb))
+ hooknum, skb))
return NF_DROP;
else
return NF_ACCEPT;
@@ -141,7 +141,7 @@
/* LOCAL_IN hook doesn't have a chain! */
ret = alloc_null_binding(ct, hooknum);
else
- ret = nf_nat_rule_find(pskb, hooknum, in, out,
+ ret = nf_nat_rule_find(skb, hooknum, in, out,
ct);
if (ret != NF_ACCEPT) {
@@ -159,31 +159,31 @@
ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
}
- return nf_nat_packet(ct, ctinfo, hooknum, pskb);
+ return nf_nat_packet(ct, ctinfo, hooknum, skb);
}
static unsigned int
nf_nat_in(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
- __be32 daddr = ip_hdr(*pskb)->daddr;
+ __be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != ip_hdr(*pskb)->daddr) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
+ daddr != ip_hdr(skb)->daddr) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
}
return ret;
}
static unsigned int
nf_nat_out(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -195,14 +195,14 @@
unsigned int ret;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr))
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.src.u3.ip !=
@@ -210,7 +210,7 @@
|| ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all
)
- return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
+ return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
}
#endif
return ret;
@@ -218,7 +218,7 @@
static unsigned int
nf_nat_local_fn(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -228,24 +228,24 @@
unsigned int ret;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr))
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
ret = NF_DROP;
}
#ifdef CONFIG_XFRM
else if (ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all)
- if (ip_xfrm_me_harder(pskb))
+ if (ip_xfrm_me_harder(skb))
ret = NF_DROP;
#endif
}
@@ -254,7 +254,7 @@
static unsigned int
nf_nat_adjust(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -262,10 +262,10 @@
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
pr_debug("nf_nat_standalone: adjusting sequence number\n");
- if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
+ if (!nf_nat_seq_adjust(skb, ct, ctinfo))
return NF_DROP;
}
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 04dfeae..0ecec70 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -20,7 +20,7 @@
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_tftp");
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp)
{
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e5b05b0..fd16cb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -70,8 +70,8 @@
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
- seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
- atomic_read(&ip_frag_mem));
+ seq_printf(seq, "FRAG: inuse %d memory %d\n",
+ ip_frag_nqueues(), ip_frag_mem());
return 0;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index eb286ab..c98ef16 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
#include <net/route.h>
#include <net/tcp.h>
#include <net/cipso_ipv4.h>
+#include <net/inet_frag.h>
/* From af_inet.c */
extern int sysctl_ip_nonlocal_bind;
@@ -357,7 +358,7 @@
{
.ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
.procname = "ipfrag_high_thresh",
- .data = &sysctl_ipfrag_high_thresh,
+ .data = &ip4_frags_ctl.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -365,7 +366,7 @@
{
.ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
.procname = "ipfrag_low_thresh",
- .data = &sysctl_ipfrag_low_thresh,
+ .data = &ip4_frags_ctl.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -381,7 +382,7 @@
{
.ctl_name = NET_IPV4_IPFRAG_TIME,
.procname = "ipfrag_time",
- .data = &sysctl_ipfrag_time,
+ .data = &ip4_frags_ctl.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -732,7 +733,7 @@
{
.ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
.procname = "ipfrag_secret_interval",
- .data = &sysctl_ipfrag_secret_interval,
+ .data = &ip4_frags_ctl.secret_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0a42e93..0f00966 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1995,8 +1995,7 @@
}
/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk,
- int packets, u32 high_seq)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -2019,7 +2018,7 @@
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
cnt += tcp_skb_pcount(skb);
- if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+ if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
break;
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2040,9 +2039,9 @@
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, lost, tp->high_seq);
+ tcp_mark_head_lost(sk, lost);
} else {
- tcp_mark_head_lost(sk, 1, tp->high_seq);
+ tcp_mark_head_lost(sk, 1);
}
/* New heuristics: it is possible only after we switched
@@ -2381,7 +2380,7 @@
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
+ tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 434ef30..a4edd66 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -78,7 +78,7 @@
while (likely((err = xfrm4_output_one(skb)) == 0)) {
nf_reset(skb);
- err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+ err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output);
if (unlikely(err != 1))
break;
@@ -86,7 +86,7 @@
if (!skb->dst->xfrm)
return dst_output(skb);
- err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+ err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
skb->dst->dev, xfrm4_output_finish2);
if (unlikely(err != 1))
break;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index c82d4d4..1e89efd 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -102,7 +102,7 @@
struct tlvtype_proc {
int type;
- int (*func)(struct sk_buff **skbp, int offset);
+ int (*func)(struct sk_buff *skb, int offset);
};
/*********************
@@ -111,10 +111,8 @@
/* An unknown option is detected, decide what to do */
-static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
-
switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return 1;
@@ -139,9 +137,8 @@
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct tlvtype_proc *curr;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
@@ -172,13 +169,13 @@
/* type specific length/alignment
checks will be performed in the
func(). */
- if (curr->func(skbp, off) == 0)
+ if (curr->func(skb, off) == 0)
return 0;
break;
}
}
if (curr->type < 0) {
- if (ip6_tlvopt_unknown(skbp, off) == 0)
+ if (ip6_tlvopt_unknown(skb, off) == 0)
return 0;
}
break;
@@ -198,9 +195,8 @@
*****************************/
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -234,22 +230,13 @@
goto discard;
if (skb_cloned(skb)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
- struct inet6_skb_parm *opt2;
-
- if (skb2 == NULL)
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto discard;
- opt2 = IP6CB(skb2);
- memcpy(opt2, opt, sizeof(*opt2));
-
- kfree_skb(skb);
-
/* update all variable using below by copied skbuff */
- *skbp = skb = skb2;
- hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
optoff);
- ipv6h = ipv6_hdr(skb2);
+ ipv6h = ipv6_hdr(skb);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -280,9 +267,8 @@
{-1, NULL}
};
-static int ipv6_destopt_rcv(struct sk_buff **skbp)
+static int ipv6_destopt_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
__u16 dstbuf;
@@ -304,9 +290,8 @@
#endif
dst = dst_clone(skb->dst);
- if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
dst_release(dst);
- skb = *skbp;
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -337,10 +322,8 @@
NONE header. No data in packet.
********************************/
-static int ipv6_nodata_rcv(struct sk_buff **skbp)
+static int ipv6_nodata_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
-
kfree_skb(skb);
return 0;
}
@@ -360,9 +343,8 @@
Routing header.
********************************/
-static int ipv6_rthdr_rcv(struct sk_buff **skbp)
+static int ipv6_rthdr_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
struct in6_addr daddr;
@@ -464,18 +446,14 @@
Do not damage packets queued somewhere.
*/
if (skb_cloned(skb)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
/* the copy is a forwarded packet */
- if (skb2 == NULL) {
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return -1;
}
- kfree_skb(skb);
- *skbp = skb = skb2;
- opt = IP6CB(skb2);
- hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -578,9 +556,8 @@
/* Router Alert as of RFC 2711 */
-static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
const unsigned char *nh = skb_network_header(skb);
if (nh[optoff + 1] == 2) {
@@ -595,9 +572,8 @@
/* Jumbo payload */
-static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
const unsigned char *nh = skb_network_header(skb);
u32 pkt_len;
@@ -648,9 +624,8 @@
{ -1, }
};
-int ipv6_parse_hopopts(struct sk_buff **skbp)
+int ipv6_parse_hopopts(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
/*
@@ -667,8 +642,7 @@
}
opt->hop = sizeof(struct ipv6hdr);
- if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
- skb = *skbp;
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 47b8ce2..9bb031f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -82,7 +82,7 @@
static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
-static int icmpv6_rcv(struct sk_buff **pskb);
+static int icmpv6_rcv(struct sk_buff *skb);
static struct inet6_protocol icmpv6_protocol = {
.handler = icmpv6_rcv,
@@ -614,9 +614,8 @@
* Handle icmp messages
*/
-static int icmpv6_rcv(struct sk_buff **pskb)
+static int icmpv6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(dev);
struct in6_addr *saddr, *daddr;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 25b9317..78de42a 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -146,7 +146,7 @@
__ip6_dst_store(sk, dst, daddr, saddr);
#ifdef CONFIG_XFRM
- if (dst) {
+ {
struct rt6_info *rt = (struct rt6_info *)dst;
rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9149fc2..fac6f7f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -125,7 +125,7 @@
}
if (hdr->nexthdr == NEXTHDR_HOP) {
- if (ipv6_parse_hopopts(&skb) < 0) {
+ if (ipv6_parse_hopopts(skb) < 0) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
rcu_read_unlock();
return 0;
@@ -149,7 +149,7 @@
*/
-static inline int ip6_input_finish(struct sk_buff *skb)
+static int ip6_input_finish(struct sk_buff *skb)
{
struct inet6_protocol *ipprot;
struct sock *raw_sk;
@@ -199,7 +199,7 @@
!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
- ret = ipprot->handler(&skb);
+ ret = ipprot->handler(skb);
if (ret > 0)
goto resubmit;
else if (ret == 0)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 011082e..13565df 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -70,7 +70,7 @@
spin_unlock_bh(&ip6_id_lock);
}
-static inline int ip6_output_finish(struct sk_buff *skb)
+static int ip6_output_finish(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 38b1496..b1326c2 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,15 +68,15 @@
}
}
-static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
{
struct ip6_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP6_LOCAL_OUT) {
- struct ipv6hdr *iph = ipv6_hdr(*pskb);
+ struct ipv6hdr *iph = ipv6_hdr(skb);
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
- return ip6_route_me_harder(*pskb);
+ return ip6_route_me_harder(skb);
}
return 0;
}
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 0473145..6413a30 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -332,6 +332,7 @@
ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
{
int diff;
+ int err;
struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
if (v->data_len < sizeof(*user_iph))
@@ -344,25 +345,18 @@
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
+ err = pskb_expand_head(e->skb, 0,
+ diff - skb_tailroom(e->skb),
+ GFP_ATOMIC);
+ if (err) {
printk(KERN_WARNING "ip6_queue: OOM "
"in mangle, dropping packet\n");
- return -ENOMEM;
+ return err;
}
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(&e->skb, v->data_len))
+ if (!skb_make_writable(e->skb, v->data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cd9df02..acaba15 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,7 +205,7 @@
}
static unsigned int
-ip6t_error(struct sk_buff **pskb,
+ip6t_error(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -350,7 +350,7 @@
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ip6t_do_table(struct sk_buff **pskb,
+ip6t_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -389,17 +389,17 @@
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
+ if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
&protoff, &offset, &hotdrop)) {
struct ip6t_entry_target *t;
if (IP6T_MATCH_ITERATE(e, do_match,
- *pskb, in, out,
+ skb, in, out,
offset, protoff, &hotdrop) != 0)
goto no_match;
ADD_COUNTER(e->counters,
- ntohs(ipv6_hdr(*pskb)->payload_len)
+ ntohs(ipv6_hdr(skb)->payload_len)
+ IPV6_HDR_LEN,
1);
@@ -409,8 +409,8 @@
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
/* The packet is traced: log it */
- if (unlikely((*pskb)->nf_trace))
- trace_packet(*pskb, hook, in, out,
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
table->name, private, e);
#endif
/* Standard target? */
@@ -448,7 +448,7 @@
((struct ip6t_entry *)table_base)->comefrom
= 0xeeeeeeec;
#endif
- verdict = t->u.kernel.target->target(pskb,
+ verdict = t->u.kernel.target->target(skb,
in, out,
hook,
t->u.kernel.target,
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ad4d943..9afc836 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -18,7 +18,7 @@
MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
MODULE_LICENSE("GPL");
-static unsigned int ip6t_hl_target(struct sk_buff **pskb,
+static unsigned int ip6t_hl_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -29,10 +29,10 @@
const struct ip6t_HL_info *info = targinfo;
int new_hl;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
- ip6h = ipv6_hdr(*pskb);
+ ip6h = ipv6_hdr(skb);
switch (info->mode) {
case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 6ab9900..7a48c34 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -431,7 +431,7 @@
}
static unsigned int
-ip6t_log_target(struct sk_buff **pskb,
+ip6t_log_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -445,8 +445,7 @@
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
- loginfo->prefix);
+ ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix);
return XT_CONTINUE;
}
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 3fd08d5..1a7d291 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -172,7 +172,7 @@
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
}
-static unsigned int reject6_target(struct sk_buff **pskb,
+static unsigned int reject6_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -187,25 +187,25 @@
must return an absolute verdict. --RR */
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
- send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
+ send_unreach(skb, ICMPV6_NOROUTE, hooknum);
break;
case IP6T_ICMP6_ADM_PROHIBITED:
- send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
+ send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum);
break;
case IP6T_ICMP6_NOT_NEIGHBOUR:
- send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+ send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
break;
case IP6T_ICMP6_ADDR_UNREACH:
- send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
+ send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum);
break;
case IP6T_ICMP6_PORT_UNREACH:
- send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
+ send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum);
break;
case IP6T_ICMP6_ECHOREPLY:
/* Do nothing */
break;
case IP6T_TCP_RESET:
- send_reset(*pskb);
+ send_reset(skb);
break;
default:
if (net_ratelimit())
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 7e32e2a..1d26b20 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,32 +60,32 @@
/* The work comes in here from netfilter.c. */
static unsigned int
ip6t_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+ return ip6t_do_table(skb, hook, in, out, &packet_filter);
}
static unsigned int
ip6t_local_out_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
#if 0
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
}
#endif
- return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+ return ip6t_do_table(skb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index f0a9efa..a0b6381 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -68,17 +68,17 @@
/* The work comes in here from netfilter.c. */
static unsigned int
ip6t_route_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+ return ip6t_do_table(skb, hook, in, out, &packet_mangler);
}
static unsigned int
ip6t_local_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -91,8 +91,8 @@
#if 0
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
@@ -100,22 +100,22 @@
#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
- memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
- memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
- mark = (*pskb)->mark;
- hop_limit = ipv6_hdr(*pskb)->hop_limit;
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
/* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
+ flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+ ret = ip6t_do_table(skb, hook, in, out, &packet_mangler);
if (ret != NF_DROP && ret != NF_STOLEN
- && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
- || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
- || (*pskb)->mark != mark
- || ipv6_hdr(*pskb)->hop_limit != hop_limit))
- return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
+ && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
+ || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr))
+ || skb->mark != mark
+ || ipv6_hdr(skb)->hop_limit != hop_limit))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
return ret;
}
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ec290e4..8f7109f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -46,12 +46,12 @@
/* The work comes in here from netfilter.c. */
static unsigned int
ip6t_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_raw);
+ return ip6t_do_table(skb, hook, in, out, &packet_raw);
}
static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 37a3db9..0e40948 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -18,6 +18,7 @@
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ipv6.h>
+#include <net/inet_frag.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
@@ -145,7 +146,7 @@
}
static unsigned int ipv6_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -155,12 +156,12 @@
struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
unsigned int ret, protoff;
- unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
- unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+ unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
/* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
goto out;
@@ -172,23 +173,23 @@
if (!helper)
goto out;
- protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
- (*pskb)->len - extoff);
- if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) {
+ protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
+ skb->len - extoff);
+ if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
- ret = helper->help(pskb, protoff, ct, ctinfo);
+ ret = helper->help(skb, protoff, ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
out:
/* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(pskb);
+ return nf_conntrack_confirm(skb);
}
static unsigned int ipv6_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -196,17 +197,17 @@
struct sk_buff *reasm;
/* Previously seen (loopback)? */
- if ((*pskb)->nfct)
+ if (skb->nfct)
return NF_ACCEPT;
- reasm = nf_ct_frag6_gather(*pskb);
+ reasm = nf_ct_frag6_gather(skb);
/* queued */
if (reasm == NULL)
return NF_STOLEN;
/* error occured or not fragmented */
- if (reasm == *pskb)
+ if (reasm == skb)
return NF_ACCEPT;
nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
@@ -216,12 +217,12 @@
}
static unsigned int ipv6_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *reasm = (*pskb)->nfct_reasm;
+ struct sk_buff *reasm = skb->nfct_reasm;
/* This packet is fragmented and has reassembled packet. */
if (reasm) {
@@ -229,32 +230,32 @@
if (!reasm->nfct) {
unsigned int ret;
- ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
+ ret = nf_conntrack_in(PF_INET6, hooknum, reasm);
if (ret != NF_ACCEPT)
return ret;
}
nf_conntrack_get(reasm->nfct);
- (*pskb)->nfct = reasm->nfct;
- (*pskb)->nfctinfo = reasm->nfctinfo;
+ skb->nfct = reasm->nfct;
+ skb->nfctinfo = reasm->nfctinfo;
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET6, hooknum, pskb);
+ return nf_conntrack_in(PF_INET6, hooknum, skb);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+ if (skb->len < sizeof(struct ipv6hdr)) {
if (net_ratelimit())
printk("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
+ return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] = {
@@ -307,7 +308,7 @@
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
.procname = "nf_conntrack_frag6_timeout",
- .data = &nf_ct_frag6_timeout,
+ .data = &nf_frags_ctl.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -315,7 +316,7 @@
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &nf_ct_frag6_low_thresh,
+ .data = &nf_frags_ctl.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -323,7 +324,7 @@
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &nf_ct_frag6_high_thresh,
+ .data = &nf_frags_ctl.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 25442a8..726fafd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -31,6 +31,7 @@
#include <net/sock.h>
#include <net/snmp.h>
+#include <net/inet_frag.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -48,10 +49,6 @@
#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
-unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
-unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
-unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
-
struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
@@ -63,51 +60,24 @@
struct nf_ct_frag6_queue
{
- struct hlist_node list;
- struct list_head lru_list; /* lru list member */
+ struct inet_frag_queue q;
__be32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
- spinlock_t lock;
- atomic_t refcnt;
- struct timer_list timer; /* expire timer */
- struct sk_buff *fragments;
- int len;
- int meat;
- ktime_t stamp;
unsigned int csum;
- __u8 last_in; /* has first/last segment arrived? */
-#define COMPLETE 4
-#define FIRST_IN 2
-#define LAST_IN 1
__u16 nhoffset;
};
-/* Hash table. */
+struct inet_frags_ctl nf_frags_ctl __read_mostly = {
+ .high_thresh = 256 * 1024,
+ .low_thresh = 192 * 1024,
+ .timeout = IPV6_FRAG_TIMEOUT,
+ .secret_interval = 10 * 60 * HZ,
+};
-#define FRAG6Q_HASHSZ 64
-
-static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static DEFINE_RWLOCK(nf_ct_frag6_lock);
-static u32 nf_ct_frag6_hash_rnd;
-static LIST_HEAD(nf_ct_frag6_lru_list);
-int nf_ct_frag6_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
-{
- hlist_del(&fq->list);
- list_del(&fq->lru_list);
- nf_ct_frag6_nqueues--;
-}
-
-static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
-{
- write_lock(&nf_ct_frag6_lock);
- __fq_unlink(fq);
- write_unlock(&nf_ct_frag6_lock);
-}
+static struct inet_frags nf_frags;
static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
@@ -120,7 +90,7 @@
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
- c += nf_ct_frag6_hash_rnd;
+ c += nf_frags.rnd;
__jhash_mix(a, b, c);
a += (__force u32)saddr->s6_addr32[3];
@@ -133,100 +103,54 @@
c += (__force u32)id;
__jhash_mix(a, b, c);
- return c & (FRAG6Q_HASHSZ - 1);
+ return c & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list nf_ct_frag6_secret_timer;
-int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
-
-static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
- unsigned long now = jiffies;
- int i;
+ struct nf_ct_frag6_queue *nq;
- write_lock(&nf_ct_frag6_lock);
- get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
- for (i = 0; i < FRAG6Q_HASHSZ; i++) {
- struct nf_ct_frag6_queue *q;
- struct hlist_node *p, *n;
-
- hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) {
- unsigned int hval = ip6qhashfn(q->id,
- &q->saddr,
- &q->daddr);
- if (hval != i) {
- hlist_del(&q->list);
- /* Relink to new hash chain. */
- hlist_add_head(&q->list,
- &nf_ct_frag6_hash[hval]);
- }
- }
- }
- write_unlock(&nf_ct_frag6_lock);
-
- mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
+ nq = container_of(q, struct nf_ct_frag6_queue, q);
+ return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
}
-atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
+static void nf_skb_free(struct sk_buff *skb)
+{
+ if (NFCT_FRAG6_CB(skb)->orig)
+ kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
/* Memory Tracking Functions. */
static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &nf_ct_frag6_mem);
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-
+ atomic_sub(skb->truesize, &nf_frags.mem);
+ nf_skb_free(skb);
kfree_skb(skb);
}
-static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
- unsigned int *work)
+static void nf_frag_free(struct inet_frag_queue *q)
{
- if (work)
- *work -= sizeof(struct nf_ct_frag6_queue);
- atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
- kfree(fq);
+ kfree(container_of(q, struct nf_ct_frag6_queue, q));
}
static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
{
- struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+ struct nf_ct_frag6_queue *fq;
- if (!fq)
+ fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+ if (fq == NULL)
return NULL;
- atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+ atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
return fq;
}
/* Destruction primitives. */
-/* Complete destruction of fq. */
-static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
- unsigned int *work)
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
{
- struct sk_buff *fp;
-
- BUG_TRAP(fq->last_in&COMPLETE);
- BUG_TRAP(del_timer(&fq->timer) == 0);
-
- /* Release all fragment data. */
- fp = fq->fragments;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- frag_kfree_skb(fp, work);
- fp = xp;
- }
-
- frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
-{
- if (atomic_dec_and_test(&fq->refcnt))
- nf_ct_frag6_destroy(fq, work);
+ inet_frag_put(&fq->q, &nf_frags);
}
/* Kill fq entry. It is not destroyed immediately,
@@ -234,62 +158,28 @@
*/
static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
{
- if (del_timer(&fq->timer))
- atomic_dec(&fq->refcnt);
-
- if (!(fq->last_in & COMPLETE)) {
- fq_unlink(fq);
- atomic_dec(&fq->refcnt);
- fq->last_in |= COMPLETE;
- }
+ inet_frag_kill(&fq->q, &nf_frags);
}
static void nf_ct_frag6_evictor(void)
{
- struct nf_ct_frag6_queue *fq;
- struct list_head *tmp;
- unsigned int work;
-
- work = atomic_read(&nf_ct_frag6_mem);
- if (work <= nf_ct_frag6_low_thresh)
- return;
-
- work -= nf_ct_frag6_low_thresh;
- while (work > 0) {
- read_lock(&nf_ct_frag6_lock);
- if (list_empty(&nf_ct_frag6_lru_list)) {
- read_unlock(&nf_ct_frag6_lock);
- return;
- }
- tmp = nf_ct_frag6_lru_list.next;
- BUG_ON(tmp == NULL);
- fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
- atomic_inc(&fq->refcnt);
- read_unlock(&nf_ct_frag6_lock);
-
- spin_lock(&fq->lock);
- if (!(fq->last_in&COMPLETE))
- fq_kill(fq);
- spin_unlock(&fq->lock);
-
- fq_put(fq, &work);
- }
+ inet_frag_evictor(&nf_frags);
}
static void nf_ct_frag6_expire(unsigned long data)
{
struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
- if (fq->last_in & COMPLETE)
+ if (fq->q.last_in & COMPLETE)
goto out;
fq_kill(fq);
out:
- spin_unlock(&fq->lock);
- fq_put(fq, NULL);
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
}
/* Creation primitives. */
@@ -302,31 +192,31 @@
struct hlist_node *n;
#endif
- write_lock(&nf_ct_frag6_lock);
+ write_lock(&nf_frags.lock);
#ifdef CONFIG_SMP
- hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+ hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
if (fq->id == fq_in->id &&
ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- write_unlock(&nf_ct_frag6_lock);
- fq_in->last_in |= COMPLETE;
- fq_put(fq_in, NULL);
+ atomic_inc(&fq->q.refcnt);
+ write_unlock(&nf_frags.lock);
+ fq_in->q.last_in |= COMPLETE;
+ fq_put(fq_in);
return fq;
}
}
#endif
fq = fq_in;
- if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
- atomic_inc(&fq->refcnt);
+ if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout))
+ atomic_inc(&fq->q.refcnt);
- atomic_inc(&fq->refcnt);
- hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]);
- INIT_LIST_HEAD(&fq->lru_list);
- list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
- nf_ct_frag6_nqueues++;
- write_unlock(&nf_ct_frag6_lock);
+ atomic_inc(&fq->q.refcnt);
+ hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
+ INIT_LIST_HEAD(&fq->q.lru_list);
+ list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
+ nf_frags.nqueues++;
+ write_unlock(&nf_frags.lock);
return fq;
}
@@ -341,15 +231,13 @@
goto oom;
}
- memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
-
fq->id = id;
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
- setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
- spin_lock_init(&fq->lock);
- atomic_set(&fq->refcnt, 1);
+ setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
+ spin_lock_init(&fq->q.lock);
+ atomic_set(&fq->q.refcnt, 1);
return nf_ct_frag6_intern(hash, fq);
@@ -364,17 +252,17 @@
struct hlist_node *n;
unsigned int hash = ip6qhashfn(id, src, dst);
- read_lock(&nf_ct_frag6_lock);
- hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+ read_lock(&nf_frags.lock);
+ hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
if (fq->id == id &&
ipv6_addr_equal(src, &fq->saddr) &&
ipv6_addr_equal(dst, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- read_unlock(&nf_ct_frag6_lock);
+ atomic_inc(&fq->q.refcnt);
+ read_unlock(&nf_frags.lock);
return fq;
}
}
- read_unlock(&nf_ct_frag6_lock);
+ read_unlock(&nf_frags.lock);
return nf_ct_frag6_create(hash, id, src, dst);
}
@@ -386,7 +274,7 @@
struct sk_buff *prev, *next;
int offset, end;
- if (fq->last_in & COMPLETE) {
+ if (fq->q.last_in & COMPLETE) {
pr_debug("Allready completed\n");
goto err;
}
@@ -412,13 +300,13 @@
/* If we already have some bits beyond end
* or have different end, the segment is corrupted.
*/
- if (end < fq->len ||
- ((fq->last_in & LAST_IN) && end != fq->len)) {
+ if (end < fq->q.len ||
+ ((fq->q.last_in & LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
- fq->last_in |= LAST_IN;
- fq->len = end;
+ fq->q.last_in |= LAST_IN;
+ fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
@@ -430,13 +318,13 @@
pr_debug("end of fragment not rounded to 8 bytes.\n");
return -1;
}
- if (end > fq->len) {
+ if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->last_in & LAST_IN) {
+ if (fq->q.last_in & LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
- fq->len = end;
+ fq->q.len = end;
}
}
@@ -458,7 +346,7 @@
* this fragment, right?
*/
prev = NULL;
- for (next = fq->fragments; next != NULL; next = next->next) {
+ for (next = fq->q.fragments; next != NULL; next = next->next) {
if (NFCT_FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -503,7 +391,7 @@
/* next fragment */
NFCT_FRAG6_CB(next)->offset += i;
- fq->meat -= i;
+ fq->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
@@ -518,9 +406,9 @@
if (prev)
prev->next = next;
else
- fq->fragments = next;
+ fq->q.fragments = next;
- fq->meat -= free_it->len;
+ fq->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL);
}
}
@@ -532,23 +420,23 @@
if (prev)
prev->next = skb;
else
- fq->fragments = skb;
+ fq->q.fragments = skb;
skb->dev = NULL;
- fq->stamp = skb->tstamp;
- fq->meat += skb->len;
- atomic_add(skb->truesize, &nf_ct_frag6_mem);
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+ atomic_add(skb->truesize, &nf_frags.mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->last_in |= FIRST_IN;
+ fq->q.last_in |= FIRST_IN;
}
- write_lock(&nf_ct_frag6_lock);
- list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
- write_unlock(&nf_ct_frag6_lock);
+ write_lock(&nf_frags.lock);
+ list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
+ write_unlock(&nf_frags.lock);
return 0;
err:
@@ -567,7 +455,7 @@
static struct sk_buff *
nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
{
- struct sk_buff *fp, *op, *head = fq->fragments;
+ struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
fq_kill(fq);
@@ -577,7 +465,7 @@
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
- sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
pr_debug("payload len is too large.\n");
@@ -614,7 +502,7 @@
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &nf_ct_frag6_mem);
+ atomic_add(clone->truesize, &nf_frags.mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -628,7 +516,7 @@
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &nf_ct_frag6_mem);
+ atomic_sub(head->truesize, &nf_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -638,12 +526,12 @@
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &nf_ct_frag6_mem);
+ atomic_sub(fp->truesize, &nf_frags.mem);
}
head->next = NULL;
head->dev = dev;
- head->tstamp = fq->stamp;
+ head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
/* Yes, and fold redundant checksum back. 8) */
@@ -652,7 +540,7 @@
skb_network_header_len(head),
head->csum);
- fq->fragments = NULL;
+ fq->q.fragments = NULL;
/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
fp = skb_shinfo(head)->frag_list;
@@ -788,7 +676,7 @@
goto ret_orig;
}
- if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
+ if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh)
nf_ct_frag6_evictor();
fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -797,23 +685,23 @@
goto ret_orig;
}
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
- spin_unlock(&fq->lock);
+ spin_unlock(&fq->q.lock);
pr_debug("Can't insert skb to queue\n");
- fq_put(fq, NULL);
+ fq_put(fq);
goto ret_orig;
}
- if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
+ if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
pr_debug("Can't reassemble fragmented packets\n");
}
- spin_unlock(&fq->lock);
+ spin_unlock(&fq->q.lock);
- fq_put(fq, NULL);
+ fq_put(fq);
return ret_skb;
ret_orig:
@@ -859,20 +747,20 @@
int nf_ct_frag6_init(void)
{
- nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
- setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
- nf_ct_frag6_secret_timer.expires = jiffies
- + nf_ct_frag6_secret_interval;
- add_timer(&nf_ct_frag6_secret_timer);
+ nf_frags.ctl = &nf_frags_ctl;
+ nf_frags.hashfn = nf_hashfn;
+ nf_frags.destructor = nf_frag_free;
+ nf_frags.skb_free = nf_skb_free;
+ nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+ inet_frags_init(&nf_frags);
return 0;
}
void nf_ct_frag6_cleanup(void)
{
- del_timer(&nf_ct_frag6_secret_timer);
- nf_ct_frag6_low_thresh = 0;
+ inet_frags_fini(&nf_frags);
+
+ nf_frags_ctl.low_thresh = 0;
nf_ct_frag6_evictor();
}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index db94501..be526ad 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -54,7 +54,7 @@
seq_printf(seq, "RAW6: inuse %d\n",
fold_prot_inuse(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
+ ip6_frag_nqueues(), ip6_frag_mem());
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 31601c9..6ad19cf 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -42,6 +42,7 @@
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/jhash.h>
+#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -53,11 +54,7 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
-
-int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
-int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
-
-int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
+#include <net/inet_frag.h>
struct ip6frag_skb_cb
{
@@ -74,53 +71,39 @@
struct frag_queue
{
- struct hlist_node list;
- struct list_head lru_list; /* lru list member */
+ struct inet_frag_queue q;
__be32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
- spinlock_t lock;
- atomic_t refcnt;
- struct timer_list timer; /* expire timer */
- struct sk_buff *fragments;
- int len;
- int meat;
int iif;
- ktime_t stamp;
unsigned int csum;
- __u8 last_in; /* has first/last segment arrived? */
-#define COMPLETE 4
-#define FIRST_IN 2
-#define LAST_IN 1
__u16 nhoffset;
};
-/* Hash table. */
+struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
+ .high_thresh = 256 * 1024,
+ .low_thresh = 192 * 1024,
+ .timeout = IPV6_FRAG_TIMEOUT,
+ .secret_interval = 10 * 60 * HZ,
+};
-#define IP6Q_HASHSZ 64
+static struct inet_frags ip6_frags;
-static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ];
-static DEFINE_RWLOCK(ip6_frag_lock);
-static u32 ip6_frag_hash_rnd;
-static LIST_HEAD(ip6_frag_lru_list);
-int ip6_frag_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct frag_queue *fq)
+int ip6_frag_nqueues(void)
{
- hlist_del(&fq->list);
- list_del(&fq->lru_list);
- ip6_frag_nqueues--;
+ return ip6_frags.nqueues;
}
-static __inline__ void fq_unlink(struct frag_queue *fq)
+int ip6_frag_mem(void)
{
- write_lock(&ip6_frag_lock);
- __fq_unlink(fq);
- write_unlock(&ip6_frag_lock);
+ return atomic_read(&ip6_frags.mem);
}
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev);
+
/*
* callers should be careful not to use the hash value outside the ipfrag_lock
* as doing so could race with ipfrag_hash_rnd being recalculated.
@@ -136,7 +119,7 @@
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
- c += ip6_frag_hash_rnd;
+ c += ip6_frags.rnd;
__jhash_mix(a, b, c);
a += (__force u32)saddr->s6_addr32[3];
@@ -149,60 +132,29 @@
c += (__force u32)id;
__jhash_mix(a, b, c);
- return c & (IP6Q_HASHSZ - 1);
+ return c & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ip6_frag_secret_rebuild(unsigned long dummy)
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
{
- unsigned long now = jiffies;
- int i;
+ struct frag_queue *fq;
- write_lock(&ip6_frag_lock);
- get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
- for (i = 0; i < IP6Q_HASHSZ; i++) {
- struct frag_queue *q;
- struct hlist_node *p, *n;
-
- hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) {
- unsigned int hval = ip6qhashfn(q->id,
- &q->saddr,
- &q->daddr);
-
- if (hval != i) {
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hlist_add_head(&q->list,
- &ip6_frag_hash[hval]);
-
- }
- }
- }
- write_unlock(&ip6_frag_lock);
-
- mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
+ fq = container_of(q, struct frag_queue, q);
+ return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
}
-atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-
/* Memory Tracking Functions. */
static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip6_frag_mem);
+ atomic_sub(skb->truesize, &ip6_frags.mem);
kfree_skb(skb);
}
-static inline void frag_free_queue(struct frag_queue *fq, int *work)
+static void ip6_frag_free(struct inet_frag_queue *fq)
{
- if (work)
- *work -= sizeof(struct frag_queue);
- atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
- kfree(fq);
+ kfree(container_of(fq, struct frag_queue, q));
}
static inline struct frag_queue *frag_alloc_queue(void)
@@ -211,36 +163,15 @@
if(!fq)
return NULL;
- atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
+ atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
return fq;
}
/* Destruction primitives. */
-/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq, int *work)
+static __inline__ void fq_put(struct frag_queue *fq)
{
- struct sk_buff *fp;
-
- BUG_TRAP(fq->last_in&COMPLETE);
- BUG_TRAP(del_timer(&fq->timer) == 0);
-
- /* Release all fragment data. */
- fp = fq->fragments;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- frag_kfree_skb(fp, work);
- fp = xp;
- }
-
- frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct frag_queue *fq, int *work)
-{
- if (atomic_dec_and_test(&fq->refcnt))
- ip6_frag_destroy(fq, work);
+ inet_frag_put(&fq->q, &ip6_frags);
}
/* Kill fq entry. It is not destroyed immediately,
@@ -248,45 +179,16 @@
*/
static __inline__ void fq_kill(struct frag_queue *fq)
{
- if (del_timer(&fq->timer))
- atomic_dec(&fq->refcnt);
-
- if (!(fq->last_in & COMPLETE)) {
- fq_unlink(fq);
- atomic_dec(&fq->refcnt);
- fq->last_in |= COMPLETE;
- }
+ inet_frag_kill(&fq->q, &ip6_frags);
}
static void ip6_evictor(struct inet6_dev *idev)
{
- struct frag_queue *fq;
- struct list_head *tmp;
- int work;
+ int evicted;
- work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
- if (work <= 0)
- return;
-
- while(work > 0) {
- read_lock(&ip6_frag_lock);
- if (list_empty(&ip6_frag_lru_list)) {
- read_unlock(&ip6_frag_lock);
- return;
- }
- tmp = ip6_frag_lru_list.next;
- fq = list_entry(tmp, struct frag_queue, lru_list);
- atomic_inc(&fq->refcnt);
- read_unlock(&ip6_frag_lock);
-
- spin_lock(&fq->lock);
- if (!(fq->last_in&COMPLETE))
- fq_kill(fq);
- spin_unlock(&fq->lock);
-
- fq_put(fq, &work);
- IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
- }
+ evicted = inet_frag_evictor(&ip6_frags);
+ if (evicted)
+ IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
}
static void ip6_frag_expire(unsigned long data)
@@ -294,9 +196,9 @@
struct frag_queue *fq = (struct frag_queue *) data;
struct net_device *dev = NULL;
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
- if (fq->last_in & COMPLETE)
+ if (fq->q.last_in & COMPLETE)
goto out;
fq_kill(fq);
@@ -311,7 +213,7 @@
rcu_read_unlock();
/* Don't send error if the first segment did not arrive. */
- if (!(fq->last_in&FIRST_IN) || !fq->fragments)
+ if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments)
goto out;
/*
@@ -319,13 +221,13 @@
segment was received. And do not use fq->dev
pointer directly, device might already disappeared.
*/
- fq->fragments->dev = dev;
- icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+ fq->q.fragments->dev = dev;
+ icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
out:
if (dev)
dev_put(dev);
- spin_unlock(&fq->lock);
- fq_put(fq, NULL);
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
}
/* Creation primitives. */
@@ -339,32 +241,32 @@
struct hlist_node *n;
#endif
- write_lock(&ip6_frag_lock);
+ write_lock(&ip6_frags.lock);
hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
#ifdef CONFIG_SMP
- hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+ hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
if (fq->id == fq_in->id &&
ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- write_unlock(&ip6_frag_lock);
- fq_in->last_in |= COMPLETE;
- fq_put(fq_in, NULL);
+ atomic_inc(&fq->q.refcnt);
+ write_unlock(&ip6_frags.lock);
+ fq_in->q.last_in |= COMPLETE;
+ fq_put(fq_in);
return fq;
}
}
#endif
fq = fq_in;
- if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
- atomic_inc(&fq->refcnt);
+ if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
+ atomic_inc(&fq->q.refcnt);
- atomic_inc(&fq->refcnt);
- hlist_add_head(&fq->list, &ip6_frag_hash[hash]);
- INIT_LIST_HEAD(&fq->lru_list);
- list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
- ip6_frag_nqueues++;
- write_unlock(&ip6_frag_lock);
+ atomic_inc(&fq->q.refcnt);
+ hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
+ INIT_LIST_HEAD(&fq->q.lru_list);
+ list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+ ip6_frags.nqueues++;
+ write_unlock(&ip6_frags.lock);
return fq;
}
@@ -382,11 +284,11 @@
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
- init_timer(&fq->timer);
- fq->timer.function = ip6_frag_expire;
- fq->timer.data = (long) fq;
- spin_lock_init(&fq->lock);
- atomic_set(&fq->refcnt, 1);
+ init_timer(&fq->q.timer);
+ fq->q.timer.function = ip6_frag_expire;
+ fq->q.timer.data = (long) fq;
+ spin_lock_init(&fq->q.lock);
+ atomic_set(&fq->q.refcnt, 1);
return ip6_frag_intern(fq);
@@ -403,30 +305,31 @@
struct hlist_node *n;
unsigned int hash;
- read_lock(&ip6_frag_lock);
+ read_lock(&ip6_frags.lock);
hash = ip6qhashfn(id, src, dst);
- hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+ hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
if (fq->id == id &&
ipv6_addr_equal(src, &fq->saddr) &&
ipv6_addr_equal(dst, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- read_unlock(&ip6_frag_lock);
+ atomic_inc(&fq->q.refcnt);
+ read_unlock(&ip6_frags.lock);
return fq;
}
}
- read_unlock(&ip6_frag_lock);
+ read_unlock(&ip6_frags.lock);
return ip6_frag_create(id, src, dst, idev);
}
-static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
+ struct net_device *dev;
int offset, end;
- if (fq->last_in & COMPLETE)
+ if (fq->q.last_in & COMPLETE)
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -439,7 +342,7 @@
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((u8 *)&fhdr->frag_off -
skb_network_header(skb)));
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE) {
@@ -454,11 +357,11 @@
/* If we already have some bits beyond end
* or have different end, the segment is corrupted.
*/
- if (end < fq->len ||
- ((fq->last_in & LAST_IN) && end != fq->len))
+ if (end < fq->q.len ||
+ ((fq->q.last_in & LAST_IN) && end != fq->q.len))
goto err;
- fq->last_in |= LAST_IN;
- fq->len = end;
+ fq->q.last_in |= LAST_IN;
+ fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
@@ -471,13 +374,13 @@
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, payload_len));
- return;
+ return -1;
}
- if (end > fq->len) {
+ if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->last_in & LAST_IN)
+ if (fq->q.last_in & LAST_IN)
goto err;
- fq->len = end;
+ fq->q.len = end;
}
}
@@ -496,7 +399,7 @@
* this fragment, right?
*/
prev = NULL;
- for(next = fq->fragments; next != NULL; next = next->next) {
+ for(next = fq->q.fragments; next != NULL; next = next->next) {
if (FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -533,7 +436,7 @@
if (!pskb_pull(next, i))
goto err;
FRAG6_CB(next)->offset += i; /* next fragment */
- fq->meat -= i;
+ fq->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
@@ -548,9 +451,9 @@
if (prev)
prev->next = next;
else
- fq->fragments = next;
+ fq->q.fragments = next;
- fq->meat -= free_it->len;
+ fq->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL);
}
}
@@ -562,30 +465,37 @@
if (prev)
prev->next = skb;
else
- fq->fragments = skb;
+ fq->q.fragments = skb;
- if (skb->dev)
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- fq->stamp = skb->tstamp;
- fq->meat += skb->len;
- atomic_add(skb->truesize, &ip6_frag_mem);
+ dev = skb->dev;
+ if (dev) {
+ fq->iif = dev->ifindex;
+ skb->dev = NULL;
+ }
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+ atomic_add(skb->truesize, &ip6_frags.mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->last_in |= FIRST_IN;
+ fq->q.last_in |= FIRST_IN;
}
- write_lock(&ip6_frag_lock);
- list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
- write_unlock(&ip6_frag_lock);
- return;
+
+ if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
+ return ip6_frag_reasm(fq, prev, dev);
+
+ write_lock(&ip6_frags.lock);
+ list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+ write_unlock(&ip6_frags.lock);
+ return -1;
err:
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
+ return -1;
}
/*
@@ -597,21 +507,39 @@
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev)
{
- struct sk_buff *fp, *head = fq->fragments;
+ struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
unsigned int nhoff;
fq_kill(fq);
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_oom;
+
+ fp->next = head->next;
+ prev->next = fp;
+
+ skb_morph(head, fq->q.fragments);
+ head->next = fq->q.fragments->next;
+
+ kfree_skb(fq->q.fragments);
+ fq->q.fragments = head;
+ }
+
BUG_TRAP(head != NULL);
BUG_TRAP(FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
- sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
@@ -640,7 +568,7 @@
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &ip6_frag_mem);
+ atomic_add(clone->truesize, &ip6_frags.mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -655,7 +583,7 @@
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &ip6_frag_mem);
+ atomic_sub(head->truesize, &ip6_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -665,17 +593,15 @@
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &ip6_frag_mem);
+ atomic_sub(fp->truesize, &ip6_frags.mem);
}
head->next = NULL;
head->dev = dev;
- head->tstamp = fq->stamp;
+ head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
IP6CB(head)->nhoff = nhoff;
- *skb_in = head;
-
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_partial(skb_network_header(head),
@@ -685,7 +611,7 @@
rcu_read_lock();
IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
- fq->fragments = NULL;
+ fq->q.fragments = NULL;
return 1;
out_oversize:
@@ -702,10 +628,8 @@
return -1;
}
-static int ipv6_frag_rcv(struct sk_buff **skbp)
+static int ipv6_frag_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
- struct net_device *dev = skb->dev;
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -739,23 +663,19 @@
return 1;
}
- if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+ if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
ip6_evictor(ip6_dst_idev(skb->dst));
if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_dst_idev(skb->dst))) != NULL) {
- int ret = -1;
+ int ret;
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
- ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
- if (fq->last_in == (FIRST_IN|LAST_IN) &&
- fq->meat == fq->len)
- ret = ip6_frag_reasm(fq, skbp, dev);
-
- spin_unlock(&fq->lock);
- fq_put(fq, NULL);
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
return ret;
}
@@ -775,11 +695,10 @@
if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
- ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
- init_timer(&ip6_frag_secret_timer);
- ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
- ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
- add_timer(&ip6_frag_secret_timer);
+ ip6_frags.ctl = &ip6_frags_ctl;
+ ip6_frags.hashfn = ip6_hashfn;
+ ip6_frags.destructor = ip6_frag_free;
+ ip6_frags.skb_free = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ inet_frags_init(&ip6_frags);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6ff19f9..cce9941 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -663,7 +663,7 @@
return rt;
}
-static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
@@ -682,7 +682,7 @@
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
- rt = rt6_select(fn, fl->iif, strict | reachable);
+ rt = rt6_select(fn, oif, strict | reachable);
BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -735,6 +735,12 @@
return rt;
}
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+ struct flowi *fl, int flags)
+{
+ return ip6_pol_route(table, fl->iif, fl, flags);
+}
+
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -761,72 +767,7 @@
static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
struct flowi *fl, int flags)
{
- struct fib6_node *fn;
- struct rt6_info *rt, *nrt;
- int strict = 0;
- int attempts = 3;
- int err;
- int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
-
- strict |= flags & RT6_LOOKUP_F_IFACE;
-
-relookup:
- read_lock_bh(&table->tb6_lock);
-
-restart_2:
- fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
-
-restart:
- rt = rt6_select(fn, fl->oif, strict | reachable);
- BACKTRACK(&fl->fl6_src);
- if (rt == &ip6_null_entry ||
- rt->rt6i_flags & RTF_CACHE)
- goto out;
-
- dst_hold(&rt->u.dst);
- read_unlock_bh(&table->tb6_lock);
-
- if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
- else {
-#if CLONE_OFFLINK_ROUTE
- nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
- goto out2;
-#endif
- }
-
- dst_release(&rt->u.dst);
- rt = nrt ? : &ip6_null_entry;
-
- dst_hold(&rt->u.dst);
- if (nrt) {
- err = ip6_ins_rt(nrt);
- if (!err)
- goto out2;
- }
-
- if (--attempts <= 0)
- goto out2;
-
- /*
- * Race condition! In the gap, when table->tb6_lock was
- * released someone could insert this route. Relookup.
- */
- dst_release(&rt->u.dst);
- goto relookup;
-
-out:
- if (reachable) {
- reachable = 0;
- goto restart_2;
- }
- dst_hold(&rt->u.dst);
- read_unlock_bh(&table->tb6_lock);
-out2:
- rt->u.dst.lastuse = jiffies;
- rt->u.dst.__use++;
- return rt;
+ return ip6_pol_route(table, fl->oif, fl, flags);
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3fb4427..68bb254 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -12,6 +12,7 @@
#include <net/ndisc.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
+#include <net/inet_frag.h>
#ifdef CONFIG_SYSCTL
@@ -41,7 +42,7 @@
{
.ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
.procname = "ip6frag_high_thresh",
- .data = &sysctl_ip6frag_high_thresh,
+ .data = &ip6_frags_ctl.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -49,7 +50,7 @@
{
.ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
.procname = "ip6frag_low_thresh",
- .data = &sysctl_ip6frag_low_thresh,
+ .data = &ip6_frags_ctl.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -57,7 +58,7 @@
{
.ctl_name = NET_IPV6_IP6FRAG_TIME,
.procname = "ip6frag_time",
- .data = &sysctl_ip6frag_time,
+ .data = &ip6_frags_ctl.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -66,7 +67,7 @@
{
.ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
.procname = "ip6frag_secret_interval",
- .data = &sysctl_ip6frag_secret_interval,
+ .data = &ip6_frags_ctl.secret_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a07b59c..737b755 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1668,9 +1668,8 @@
return 0;
}
-static int tcp_v6_rcv(struct sk_buff **pskb)
+static int tcp_v6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct tcphdr *th;
struct sock *sk;
int ret;
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 23e2809..6323921 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -87,9 +87,8 @@
EXPORT_SYMBOL(xfrm6_tunnel_deregister);
-static int tunnel6_rcv(struct sk_buff **pskb)
+static int tunnel6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct xfrm6_tunnel *handler;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -106,9 +105,8 @@
return 0;
}
-static int tunnel46_rcv(struct sk_buff **pskb)
+static int tunnel46_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct xfrm6_tunnel *handler;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 82ff26d..caebad6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -405,10 +405,9 @@
return 0;
}
-int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
+int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
int proto)
{
- struct sk_buff *skb = *pskb;
struct sock *sk;
struct udphdr *uh;
struct net_device *dev = skb->dev;
@@ -494,9 +493,9 @@
return 0;
}
-static __inline__ int udpv6_rcv(struct sk_buff **pskb)
+static __inline__ int udpv6_rcv(struct sk_buff *skb)
{
- return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
+ return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
/*
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 6e252f3..2d3fda60 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -6,7 +6,7 @@
#include <net/addrconf.h>
#include <net/inet_common.h>
-extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int );
+extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
int , int , int , __be32 , struct hlist_head []);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f54016a..766566f 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,9 +17,9 @@
DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
-static int udplitev6_rcv(struct sk_buff **pskb)
+static int udplitev6_rcv(struct sk_buff *skb)
{
- return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
+ return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
}
static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index c858537..02f69e5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -133,9 +133,9 @@
EXPORT_SYMBOL(xfrm6_rcv_spi);
-int xfrm6_rcv(struct sk_buff **pskb)
+int xfrm6_rcv(struct sk_buff *skb)
{
- return xfrm6_rcv_spi(*pskb, 0);
+ return xfrm6_rcv_spi(skb, 0);
}
EXPORT_SYMBOL(xfrm6_rcv);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4618c18..a5a32c1 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -80,7 +80,7 @@
while (likely((err = xfrm6_output_one(skb)) == 0)) {
nf_reset(skb);
- err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+ err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output);
if (unlikely(err != 1))
break;
@@ -88,7 +88,7 @@
if (!skb->dst->xfrm)
return dst_output(skb);
- err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+ err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL,
skb->dst->dev, xfrm6_output_finish2);
if (unlikely(err != 1))
break;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a523fa4..bed9ba0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -117,7 +117,7 @@
EXPORT_SYMBOL(nf_unregister_hooks);
unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
+ struct sk_buff *skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
@@ -160,7 +160,7 @@
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
@@ -175,17 +175,17 @@
elem = &nf_hooks[pf][hook];
next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+ verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
outdev, &elem, okfn, hook_thresh);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
goto unlock;
} else if (verdict == NF_DROP) {
- kfree_skb(*pskb);
+ kfree_skb(skb);
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
- if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
+ if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
goto next_hook;
}
@@ -196,34 +196,24 @@
EXPORT_SYMBOL(nf_hook_slow);
-int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
{
- struct sk_buff *nskb;
-
- if (writable_len > (*pskb)->len)
+ if (writable_len > skb->len)
return 0;
/* Not exclusive use of packet? Must copy. */
- if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
- goto copy_skb;
- if (skb_shared(*pskb))
- goto copy_skb;
+ if (!skb_cloned(skb)) {
+ if (writable_len <= skb_headlen(skb))
+ return 1;
+ } else if (skb_clone_writable(skb, writable_len))
+ return 1;
- return pskb_may_pull(*pskb, writable_len);
+ if (writable_len <= skb_headlen(skb))
+ writable_len = 0;
+ else
+ writable_len -= skb_headlen(skb);
-copy_skb:
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return 0;
- BUG_ON(skb_is_nonlinear(nskb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
+ return !!__pskb_pull_tail(skb, writable_len);
}
EXPORT_SYMBOL(skb_make_writable);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index e42ab23..7b8239c 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -36,7 +36,7 @@
module_param(ts_algo, charp, 0400);
MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
-unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -79,7 +79,7 @@
},
};
-static int amanda_help(struct sk_buff **pskb,
+static int amanda_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -101,25 +101,25 @@
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
- nf_ct_refresh(ct, *pskb, master_timeout * HZ);
+ nf_ct_refresh(ct, skb, master_timeout * HZ);
/* No data? */
dataoff = protoff + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
+ if (dataoff >= skb->len) {
if (net_ratelimit())
- printk("amanda_help: skblen = %u\n", (*pskb)->len);
+ printk("amanda_help: skblen = %u\n", skb->len);
return NF_ACCEPT;
}
memset(&ts, 0, sizeof(ts));
- start = skb_find_text(*pskb, dataoff, (*pskb)->len,
+ start = skb_find_text(skb, dataoff, skb->len,
search[SEARCH_CONNECT].ts, &ts);
if (start == UINT_MAX)
goto out;
start += dataoff + search[SEARCH_CONNECT].len;
memset(&ts, 0, sizeof(ts));
- stop = skb_find_text(*pskb, start, (*pskb)->len,
+ stop = skb_find_text(skb, start, skb->len,
search[SEARCH_NEWLINE].ts, &ts);
if (stop == UINT_MAX)
goto out;
@@ -127,13 +127,13 @@
for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
memset(&ts, 0, sizeof(ts));
- off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
+ off = skb_find_text(skb, start, stop, search[i].ts, &ts);
if (off == UINT_MAX)
continue;
off += start + search[i].len;
len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
- if (skb_copy_bits(*pskb, off, pbuf, len))
+ if (skb_copy_bits(skb, off, pbuf, len))
break;
pbuf[len] = '\0';
@@ -153,7 +153,7 @@
nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
- ret = nf_nat_amanda(pskb, ctinfo, off - dataoff,
+ ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
len, exp);
else if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 83c30b4..4d6171b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -307,7 +307,7 @@
/* Confirm a connection given skb; places it in hash table */
int
-__nf_conntrack_confirm(struct sk_buff **pskb)
+__nf_conntrack_confirm(struct sk_buff *skb)
{
unsigned int hash, repl_hash;
struct nf_conntrack_tuple_hash *h;
@@ -316,7 +316,7 @@
struct hlist_node *n;
enum ip_conntrack_info ctinfo;
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* ipt_REJECT uses nf_conntrack_attach to attach related
ICMP/TCP RST packets in other direction. Actual packet
@@ -367,14 +367,14 @@
write_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
- nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+ nf_conntrack_event_cache(IPCT_HELPER, skb);
#ifdef CONFIG_NF_NAT_NEEDED
if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+ nf_conntrack_event_cache(IPCT_NATINFO, skb);
#endif
nf_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
+ IPCT_RELATED : IPCT_NEW, skb);
return NF_ACCEPT;
out:
@@ -632,7 +632,7 @@
}
unsigned int
-nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
+nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -644,14 +644,14 @@
int ret;
/* Previously seen (loopback or untracked)? Ignore. */
- if ((*pskb)->nfct) {
+ if (skb->nfct) {
NF_CT_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_slow */
l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
- ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb),
+ ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= 0) {
pr_debug("not prepared to track yet or error occured\n");
@@ -666,13 +666,13 @@
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
if (l4proto->error != NULL &&
- (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+ (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
NF_CT_STAT_INC_ATOMIC(error);
NF_CT_STAT_INC_ATOMIC(invalid);
return -ret;
}
- ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto,
+ ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
&set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
@@ -686,21 +686,21 @@
return NF_DROP;
}
- NF_CT_ASSERT((*pskb)->nfct);
+ NF_CT_ASSERT(skb->nfct);
- ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
if (ret < 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
- nf_conntrack_put((*pskb)->nfct);
- (*pskb)->nfct = NULL;
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = NULL;
NF_CT_STAT_INC_ATOMIC(invalid);
return -ret;
}
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, *pskb);
+ nf_conntrack_event_cache(IPCT_STATUS, skb);
return ret;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index c763ee7..6df2590 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -43,7 +43,7 @@
static int loose;
module_param(loose, bool, 0600);
-unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
unsigned int matchoff,
@@ -344,7 +344,7 @@
}
}
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -371,21 +371,21 @@
return NF_ACCEPT;
}
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
dataoff = protoff + th->doff * 4;
/* No data? */
- if (dataoff >= (*pskb)->len) {
+ if (dataoff >= skb->len) {
pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
- (*pskb)->len);
+ skb->len);
return NF_ACCEPT;
}
- datalen = (*pskb)->len - dataoff;
+ datalen = skb->len - dataoff;
spin_lock_bh(&nf_ftp_lock);
- fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
+ fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
BUG_ON(fb_ptr == NULL);
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
@@ -491,7 +491,7 @@
* (possibly changed) expectation itself. */
nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
- ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
+ ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
@@ -508,7 +508,7 @@
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info, dir, *pskb);
+ update_nl_seq(seq, ct_ftp_info, dir, skb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a8a9dfb..f23fd95 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -47,27 +47,27 @@
"(determined by routing information)");
/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff **pskb,
+int (*set_h245_addr_hook) (struct sk_buff *skb,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
__read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff **pskb,
+int (*set_h225_addr_hook) (struct sk_buff *skb,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
__read_mostly;
-int (*set_sig_addr_hook) (struct sk_buff **pskb,
+int (*set_sig_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count) __read_mostly;
-int (*set_ras_addr_hook) (struct sk_buff **pskb,
+int (*set_ras_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count) __read_mostly;
-int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -75,25 +75,25 @@
__be16 port, __be16 rtp_port,
struct nf_conntrack_expect *rtp_exp,
struct nf_conntrack_expect *rtcp_exp) __read_mostly;
-int (*nat_t120_hook) (struct sk_buff **pskb,
+int (*nat_t120_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_h245_hook) (struct sk_buff **pskb,
+int (*nat_h245_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+int (*nat_callforwarding_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_q931_hook) (struct sk_buff **pskb,
+int (*nat_q931_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, TransportAddress *taddr, int idx,
@@ -108,7 +108,7 @@
static struct nf_conntrack_helper nf_conntrack_helper_ras[];
/****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
+static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
unsigned char **data, int *datalen, int *dataoff)
{
@@ -122,7 +122,7 @@
int tpktoff;
/* Get TCP header */
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
@@ -130,13 +130,13 @@
tcpdataoff = protoff + th->doff * 4;
/* Get TCP data length */
- tcpdatalen = (*pskb)->len - tcpdataoff;
+ tcpdatalen = skb->len - tcpdataoff;
if (tcpdatalen <= 0) /* No TCP data */
goto clear_out;
if (*data == NULL) { /* first TPKT */
/* Get first TPKT pointer */
- tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
+ tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
h323_buffer);
BUG_ON(tpkt == NULL);
@@ -248,7 +248,7 @@
}
/****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr)
@@ -297,7 +297,7 @@
(nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -321,7 +321,7 @@
}
/****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
+static int expect_t120(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -355,7 +355,7 @@
(nat_t120 = rcu_dereference(nat_t120_hook)) &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr,
+ ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -371,7 +371,7 @@
}
/****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
+static int process_h245_channel(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -381,7 +381,7 @@
if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
/* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&channel->mediaChannel);
if (ret < 0)
return -1;
@@ -390,7 +390,7 @@
if (channel->
options & eH2250LogicalChannelParameters_mediaControlChannel) {
/* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&channel->mediaControlChannel);
if (ret < 0)
return -1;
@@ -400,7 +400,7 @@
}
/****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
OpenLogicalChannel *olc)
@@ -412,7 +412,7 @@
if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
{
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
&olc->
forwardLogicalChannelParameters.
multiplexParameters.
@@ -430,7 +430,7 @@
eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
{
ret =
- process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ process_h245_channel(skb, ct, ctinfo, data, dataoff,
&olc->
reverseLogicalChannelParameters.
multiplexParameters.
@@ -448,7 +448,7 @@
t120.choice == eDataProtocolCapability_separateLANStack &&
olc->separateStack.networkAddress.choice ==
eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_t120(skb, ct, ctinfo, data, dataoff,
&olc->separateStack.networkAddress.
localAreaAddress);
if (ret < 0)
@@ -459,7 +459,7 @@
}
/****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
OpenLogicalChannelAck *olca)
@@ -477,7 +477,7 @@
choice ==
eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
{
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
&olca->
reverseLogicalChannelParameters.
multiplexParameters.
@@ -496,7 +496,7 @@
if (ack->options &
eH2250LogicalChannelAckParameters_mediaChannel) {
/* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&ack->mediaChannel);
if (ret < 0)
return -1;
@@ -505,7 +505,7 @@
if (ack->options &
eH2250LogicalChannelAckParameters_mediaControlChannel) {
/* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&ack->mediaControlChannel);
if (ret < 0)
return -1;
@@ -515,7 +515,7 @@
if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
olca->separateStack.networkAddress.choice ==
eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_t120(skb, ct, ctinfo, data, dataoff,
&olca->separateStack.networkAddress.
localAreaAddress);
if (ret < 0)
@@ -526,7 +526,7 @@
}
/****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
MultimediaSystemControlMessage *mscm)
@@ -535,7 +535,7 @@
case eMultimediaSystemControlMessage_request:
if (mscm->request.choice ==
eRequestMessage_openLogicalChannel) {
- return process_olc(pskb, ct, ctinfo, data, dataoff,
+ return process_olc(skb, ct, ctinfo, data, dataoff,
&mscm->request.openLogicalChannel);
}
pr_debug("nf_ct_h323: H.245 Request %d\n",
@@ -544,7 +544,7 @@
case eMultimediaSystemControlMessage_response:
if (mscm->response.choice ==
eResponseMessage_openLogicalChannelAck) {
- return process_olca(pskb, ct, ctinfo, data, dataoff,
+ return process_olca(skb, ct, ctinfo, data, dataoff,
&mscm->response.
openLogicalChannelAck);
}
@@ -560,7 +560,7 @@
}
/****************************************************************************/
-static int h245_help(struct sk_buff **pskb, unsigned int protoff,
+static int h245_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
static MultimediaSystemControlMessage mscm;
@@ -574,12 +574,12 @@
ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
return NF_ACCEPT;
}
- pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len);
+ pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
spin_lock_bh(&nf_h323_lock);
/* Process each TPKT */
- while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+ while (get_tpkt_data(skb, protoff, ct, ctinfo,
&data, &datalen, &dataoff)) {
pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -596,7 +596,7 @@
}
/* Process H.245 signal */
- if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+ if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
goto drop;
}
@@ -654,7 +654,7 @@
}
/****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr)
@@ -687,7 +687,7 @@
(nat_h245 = rcu_dereference(nat_h245_hook)) &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr,
+ ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -758,7 +758,7 @@
}
/****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
+static int expect_callforwarding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -798,7 +798,7 @@
(nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
ct->status & IPS_NAT_MASK) {
/* Need NAT */
- ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
+ ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
taddr, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -814,7 +814,7 @@
}
/****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Setup_UUIE *setup)
@@ -829,7 +829,7 @@
pr_debug("nf_ct_q931: Setup\n");
if (setup->options & eSetup_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&setup->h245Address);
if (ret < 0)
return -1;
@@ -846,7 +846,7 @@
NIP6(*(struct in6_addr *)&addr), ntohs(port),
NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
+ ret = set_h225_addr(skb, data, dataoff,
&setup->destCallSignalAddress,
&ct->tuplehash[!dir].tuple.src.u3,
ct->tuplehash[!dir].tuple.src.u.tcp.port);
@@ -864,7 +864,7 @@
NIP6(*(struct in6_addr *)&addr), ntohs(port),
NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
+ ret = set_h225_addr(skb, data, dataoff,
&setup->sourceCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.dst.u.tcp.port);
@@ -874,7 +874,7 @@
if (setup->options & eSetup_UUIE_fastStart) {
for (i = 0; i < setup->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&setup->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -885,7 +885,7 @@
}
/****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
+static int process_callproceeding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -897,7 +897,7 @@
pr_debug("nf_ct_q931: CallProceeding\n");
if (callproc->options & eCallProceeding_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&callproc->h245Address);
if (ret < 0)
return -1;
@@ -905,7 +905,7 @@
if (callproc->options & eCallProceeding_UUIE_fastStart) {
for (i = 0; i < callproc->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&callproc->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -916,7 +916,7 @@
}
/****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Connect_UUIE *connect)
@@ -927,7 +927,7 @@
pr_debug("nf_ct_q931: Connect\n");
if (connect->options & eConnect_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&connect->h245Address);
if (ret < 0)
return -1;
@@ -935,7 +935,7 @@
if (connect->options & eConnect_UUIE_fastStart) {
for (i = 0; i < connect->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&connect->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -946,7 +946,7 @@
}
/****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Alerting_UUIE *alert)
@@ -957,7 +957,7 @@
pr_debug("nf_ct_q931: Alerting\n");
if (alert->options & eAlerting_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&alert->h245Address);
if (ret < 0)
return -1;
@@ -965,7 +965,7 @@
if (alert->options & eAlerting_UUIE_fastStart) {
for (i = 0; i < alert->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&alert->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -976,7 +976,7 @@
}
/****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Facility_UUIE *facility)
@@ -988,7 +988,7 @@
if (facility->reason.choice == eFacilityReason_callForwarded) {
if (facility->options & eFacility_UUIE_alternativeAddress)
- return expect_callforwarding(pskb, ct, ctinfo, data,
+ return expect_callforwarding(skb, ct, ctinfo, data,
dataoff,
&facility->
alternativeAddress);
@@ -996,7 +996,7 @@
}
if (facility->options & eFacility_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&facility->h245Address);
if (ret < 0)
return -1;
@@ -1004,7 +1004,7 @@
if (facility->options & eFacility_UUIE_fastStart) {
for (i = 0; i < facility->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&facility->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -1015,7 +1015,7 @@
}
/****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Progress_UUIE *progress)
@@ -1026,7 +1026,7 @@
pr_debug("nf_ct_q931: Progress\n");
if (progress->options & eProgress_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&progress->h245Address);
if (ret < 0)
return -1;
@@ -1034,7 +1034,7 @@
if (progress->options & eProgress_UUIE_fastStart) {
for (i = 0; i < progress->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&progress->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -1045,7 +1045,7 @@
}
/****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff, Q931 *q931)
{
@@ -1055,28 +1055,28 @@
switch (pdu->h323_message_body.choice) {
case eH323_UU_PDU_h323_message_body_setup:
- ret = process_setup(pskb, ct, ctinfo, data, dataoff,
+ ret = process_setup(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.setup);
break;
case eH323_UU_PDU_h323_message_body_callProceeding:
- ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
+ ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.
callProceeding);
break;
case eH323_UU_PDU_h323_message_body_connect:
- ret = process_connect(pskb, ct, ctinfo, data, dataoff,
+ ret = process_connect(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.connect);
break;
case eH323_UU_PDU_h323_message_body_alerting:
- ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
+ ret = process_alerting(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.alerting);
break;
case eH323_UU_PDU_h323_message_body_facility:
- ret = process_facility(pskb, ct, ctinfo, data, dataoff,
+ ret = process_facility(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.facility);
break;
case eH323_UU_PDU_h323_message_body_progress:
- ret = process_progress(pskb, ct, ctinfo, data, dataoff,
+ ret = process_progress(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.progress);
break;
default:
@@ -1090,7 +1090,7 @@
if (pdu->options & eH323_UU_PDU_h245Control) {
for (i = 0; i < pdu->h245Control.count; i++) {
- ret = process_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = process_h245(skb, ct, ctinfo, data, dataoff,
&pdu->h245Control.item[i]);
if (ret < 0)
return -1;
@@ -1101,7 +1101,7 @@
}
/****************************************************************************/
-static int q931_help(struct sk_buff **pskb, unsigned int protoff,
+static int q931_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
static Q931 q931;
@@ -1115,12 +1115,12 @@
ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
return NF_ACCEPT;
}
- pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len);
+ pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
spin_lock_bh(&nf_h323_lock);
/* Process each TPKT */
- while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+ while (get_tpkt_data(skb, protoff, ct, ctinfo,
&data, &datalen, &dataoff)) {
pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -1136,7 +1136,7 @@
}
/* Process Q.931 signal */
- if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
+ if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
goto drop;
}
@@ -1177,20 +1177,20 @@
};
/****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff,
+static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
int *datalen)
{
struct udphdr _uh, *uh;
int dataoff;
- uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh);
+ uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
if (uh == NULL)
return NULL;
dataoff = protoff + sizeof(_uh);
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NULL;
- *datalen = (*pskb)->len - dataoff;
- return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
+ *datalen = skb->len - dataoff;
+ return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
}
/****************************************************************************/
@@ -1227,7 +1227,7 @@
}
/****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count)
@@ -1265,7 +1265,7 @@
nat_q931 = rcu_dereference(nat_q931_hook);
if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */
- ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
+ ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1283,7 +1283,7 @@
}
/****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, GatekeeperRequest *grq)
{
@@ -1293,13 +1293,13 @@
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */
- return set_ras_addr(pskb, ct, ctinfo, data,
+ return set_ras_addr(skb, ct, ctinfo, data,
&grq->rasAddress, 1);
return 0;
}
/****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, GatekeeperConfirm *gcf)
{
@@ -1343,7 +1343,7 @@
}
/****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, RegistrationRequest *rrq)
{
@@ -1353,7 +1353,7 @@
pr_debug("nf_ct_ras: RRQ\n");
- ret = expect_q931(pskb, ct, ctinfo, data,
+ ret = expect_q931(skb, ct, ctinfo, data,
rrq->callSignalAddress.item,
rrq->callSignalAddress.count);
if (ret < 0)
@@ -1361,7 +1361,7 @@
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
+ ret = set_ras_addr(skb, ct, ctinfo, data,
rrq->rasAddress.item,
rrq->rasAddress.count);
if (ret < 0)
@@ -1378,7 +1378,7 @@
}
/****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, RegistrationConfirm *rcf)
{
@@ -1392,7 +1392,7 @@
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
+ ret = set_sig_addr(skb, ct, ctinfo, data,
rcf->callSignalAddress.item,
rcf->callSignalAddress.count);
if (ret < 0)
@@ -1407,7 +1407,7 @@
if (info->timeout > 0) {
pr_debug("nf_ct_ras: set RAS connection timeout to "
"%u seconds\n", info->timeout);
- nf_ct_refresh(ct, *pskb, info->timeout * HZ);
+ nf_ct_refresh(ct, skb, info->timeout * HZ);
/* Set expect timeout */
read_lock_bh(&nf_conntrack_lock);
@@ -1427,7 +1427,7 @@
}
/****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, UnregistrationRequest *urq)
{
@@ -1440,7 +1440,7 @@
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
+ ret = set_sig_addr(skb, ct, ctinfo, data,
urq->callSignalAddress.item,
urq->callSignalAddress.count);
if (ret < 0)
@@ -1453,13 +1453,13 @@
info->sig_port[!dir] = 0;
/* Give it 30 seconds for UCF or URJ */
- nf_ct_refresh(ct, *pskb, 30 * HZ);
+ nf_ct_refresh(ct, skb, 30 * HZ);
return 0;
}
/****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, AdmissionRequest *arq)
{
@@ -1479,7 +1479,7 @@
port == info->sig_port[dir] &&
set_h225_addr && ct->status & IPS_NAT_MASK) {
/* Answering ARQ */
- return set_h225_addr(pskb, data, 0,
+ return set_h225_addr(skb, data, 0,
&arq->destCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
@@ -1491,7 +1491,7 @@
!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
set_h225_addr && ct->status & IPS_NAT_MASK) {
/* Calling ARQ */
- return set_h225_addr(pskb, data, 0,
+ return set_h225_addr(skb, data, 0,
&arq->srcCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
port);
@@ -1501,7 +1501,7 @@
}
/****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, AdmissionConfirm *acf)
{
@@ -1522,7 +1522,7 @@
/* Answering ACF */
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK)
- return set_sig_addr(pskb, ct, ctinfo, data,
+ return set_sig_addr(skb, ct, ctinfo, data,
&acf->destCallSignalAddress, 1);
return 0;
}
@@ -1548,7 +1548,7 @@
}
/****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, LocationRequest *lrq)
{
@@ -1558,13 +1558,13 @@
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK)
- return set_ras_addr(pskb, ct, ctinfo, data,
+ return set_ras_addr(skb, ct, ctinfo, data,
&lrq->replyAddress, 1);
return 0;
}
/****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, LocationConfirm *lcf)
{
@@ -1603,7 +1603,7 @@
}
/****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, InfoRequestResponse *irr)
{
@@ -1615,7 +1615,7 @@
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
+ ret = set_ras_addr(skb, ct, ctinfo, data,
&irr->rasAddress, 1);
if (ret < 0)
return -1;
@@ -1623,7 +1623,7 @@
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
+ ret = set_sig_addr(skb, ct, ctinfo, data,
irr->callSignalAddress.item,
irr->callSignalAddress.count);
if (ret < 0)
@@ -1634,40 +1634,40 @@
}
/****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, RasMessage *ras)
{
switch (ras->choice) {
case eRasMessage_gatekeeperRequest:
- return process_grq(pskb, ct, ctinfo, data,
+ return process_grq(skb, ct, ctinfo, data,
&ras->gatekeeperRequest);
case eRasMessage_gatekeeperConfirm:
- return process_gcf(pskb, ct, ctinfo, data,
+ return process_gcf(skb, ct, ctinfo, data,
&ras->gatekeeperConfirm);
case eRasMessage_registrationRequest:
- return process_rrq(pskb, ct, ctinfo, data,
+ return process_rrq(skb, ct, ctinfo, data,
&ras->registrationRequest);
case eRasMessage_registrationConfirm:
- return process_rcf(pskb, ct, ctinfo, data,
+ return process_rcf(skb, ct, ctinfo, data,
&ras->registrationConfirm);
case eRasMessage_unregistrationRequest:
- return process_urq(pskb, ct, ctinfo, data,
+ return process_urq(skb, ct, ctinfo, data,
&ras->unregistrationRequest);
case eRasMessage_admissionRequest:
- return process_arq(pskb, ct, ctinfo, data,
+ return process_arq(skb, ct, ctinfo, data,
&ras->admissionRequest);
case eRasMessage_admissionConfirm:
- return process_acf(pskb, ct, ctinfo, data,
+ return process_acf(skb, ct, ctinfo, data,
&ras->admissionConfirm);
case eRasMessage_locationRequest:
- return process_lrq(pskb, ct, ctinfo, data,
+ return process_lrq(skb, ct, ctinfo, data,
&ras->locationRequest);
case eRasMessage_locationConfirm:
- return process_lcf(pskb, ct, ctinfo, data,
+ return process_lcf(skb, ct, ctinfo, data,
&ras->locationConfirm);
case eRasMessage_infoRequestResponse:
- return process_irr(pskb, ct, ctinfo, data,
+ return process_irr(skb, ct, ctinfo, data,
&ras->infoRequestResponse);
default:
pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
@@ -1678,7 +1678,7 @@
}
/****************************************************************************/
-static int ras_help(struct sk_buff **pskb, unsigned int protoff,
+static int ras_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
static RasMessage ras;
@@ -1686,12 +1686,12 @@
int datalen = 0;
int ret;
- pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len);
+ pr_debug("nf_ct_ras: skblen = %u\n", skb->len);
spin_lock_bh(&nf_h323_lock);
/* Get UDP data */
- data = get_udp_data(pskb, protoff, &datalen);
+ data = get_udp_data(skb, protoff, &datalen);
if (data == NULL)
goto accept;
pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
@@ -1707,7 +1707,7 @@
}
/* Process RAS message */
- if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
+ if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
goto drop;
accept:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1562ca9..dfaed4b 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -30,7 +30,7 @@
static char *irc_buffer;
static DEFINE_SPINLOCK(irc_buffer_lock);
-unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -89,7 +89,7 @@
return 0;
}
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
unsigned int dataoff;
@@ -116,22 +116,22 @@
return NF_ACCEPT;
/* Not a full tcp header? */
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
/* No data? */
dataoff = protoff + th->doff*4;
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NF_ACCEPT;
spin_lock_bh(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff,
+ ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
irc_buffer);
BUG_ON(ib_ptr == NULL);
data = ib_ptr;
- data_limit = ib_ptr + (*pskb)->len - dataoff;
+ data_limit = ib_ptr + skb->len - dataoff;
/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
* 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -143,7 +143,7 @@
data += 5;
/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n",
NIPQUAD(iph->saddr), ntohs(th->source),
NIPQUAD(iph->daddr), ntohs(th->dest));
@@ -193,7 +193,7 @@
nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
if (nf_nat_irc && ct->status & IPS_NAT_MASK)
- ret = nf_nat_irc(pskb, ctinfo,
+ ret = nf_nat_irc(skb, ctinfo,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 1d59fab..9810d81 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -42,17 +42,17 @@
module_param(timeout, uint, 0400);
MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nf_conntrack_expect *exp;
- struct iphdr *iph = ip_hdr(*pskb);
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = (struct rtable *)skb->dst;
struct in_device *in_dev;
__be32 mask = 0;
/* we're only interested in locally generated packets */
- if ((*pskb)->sk == NULL)
+ if (skb->sk == NULL)
goto out;
if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
goto out;
@@ -91,7 +91,7 @@
nf_ct_expect_related(exp);
nf_ct_expect_put(exp);
- nf_ct_refresh(ct, *pskb, timeout * HZ);
+ nf_ct_refresh(ct, skb, timeout * HZ);
out:
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b080419..099b6df 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -41,14 +41,14 @@
static DEFINE_SPINLOCK(nf_pptp_lock);
int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq) __read_mostly;
@@ -254,7 +254,7 @@
}
static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
unsigned int reqlen,
@@ -367,7 +367,7 @@
nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
+ return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -380,7 +380,7 @@
}
static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
unsigned int reqlen,
@@ -462,7 +462,7 @@
nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
+ return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -492,7 +492,7 @@
/* track caller id inside control connection, call expect_related */
static int
-conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
+conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -502,7 +502,7 @@
struct pptp_pkt_hdr _pptph, *pptph;
struct PptpControlHeader _ctlh, *ctlh;
union pptp_ctrl_union _pptpReq, *pptpReq;
- unsigned int tcplen = (*pskb)->len - protoff;
+ unsigned int tcplen = skb->len - protoff;
unsigned int datalen, reqlen, nexthdr_off;
int oldsstate, oldcstate;
int ret;
@@ -514,12 +514,12 @@
return NF_ACCEPT;
nexthdr_off = protoff;
- tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+ tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
BUG_ON(!tcph);
nexthdr_off += tcph->doff * 4;
datalen = tcplen - tcph->doff * 4;
- pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+ pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph);
if (!pptph) {
pr_debug("no full PPTP header, can't track\n");
return NF_ACCEPT;
@@ -534,7 +534,7 @@
return NF_ACCEPT;
}
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh);
if (!ctlh)
return NF_ACCEPT;
nexthdr_off += sizeof(_ctlh);
@@ -547,7 +547,7 @@
if (reqlen > sizeof(*pptpReq))
reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq);
if (!pptpReq)
return NF_ACCEPT;
@@ -560,11 +560,11 @@
* established from PNS->PAC. However, RFC makes no guarantee */
if (dir == IP_CT_DIR_ORIGINAL)
/* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+ ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
ctinfo);
else
/* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+ ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
ctinfo);
pr_debug("sstate: %d->%d, cstate: %d->%d\n",
oldsstate, info->sstate, oldcstate, info->cstate);
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 355d371..b5a16c6 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -56,7 +56,7 @@
/* other fields aren't interesting for conntrack */
};
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -80,19 +80,19 @@
return NF_ACCEPT;
/* Not a full tcp header? */
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
/* No data? */
dataoff = protoff + th->doff * 4;
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NF_ACCEPT;
- datalen = (*pskb)->len - dataoff;
+ datalen = skb->len - dataoff;
spin_lock_bh(&nf_sane_lock);
- sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer);
+ sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
BUG_ON(sb_ptr == NULL);
if (dir == IP_CT_DIR_ORIGINAL) {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d449fa4..8f8b5a4 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -36,13 +36,13 @@
module_param(sip_timeout, uint, 0600);
MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
-unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp,
const char *dptr) __read_mostly;
@@ -363,7 +363,7 @@
}
EXPORT_SYMBOL_GPL(ct_sip_get_info);
-static int set_expected_rtp(struct sk_buff **pskb,
+static int set_expected_rtp(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
union nf_conntrack_address *addr,
@@ -385,7 +385,7 @@
nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook);
if (nf_nat_sdp && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sdp(pskb, ctinfo, exp, dptr);
+ ret = nf_nat_sdp(skb, ctinfo, exp, dptr);
else {
if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
@@ -397,7 +397,7 @@
return ret;
}
-static int sip_help(struct sk_buff **pskb,
+static int sip_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -414,13 +414,13 @@
/* No Data ? */
dataoff = protoff + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NF_ACCEPT;
- nf_ct_refresh(ct, *pskb, sip_timeout * HZ);
+ nf_ct_refresh(ct, skb, sip_timeout * HZ);
- if (!skb_is_nonlinear(*pskb))
- dptr = (*pskb)->data + dataoff;
+ if (!skb_is_nonlinear(skb))
+ dptr = skb->data + dataoff;
else {
pr_debug("Copy of skbuff not supported yet.\n");
goto out;
@@ -428,13 +428,13 @@
nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
if (nf_nat_sip && ct->status & IPS_NAT_MASK) {
- if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) {
+ if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) {
ret = NF_DROP;
goto out;
}
}
- datalen = (*pskb)->len - dataoff;
+ datalen = skb->len - dataoff;
if (datalen < sizeof("SIP/2.0 200") - 1)
goto out;
@@ -464,7 +464,7 @@
ret = NF_DROP;
goto out;
}
- ret = set_expected_rtp(pskb, ct, ctinfo, &addr,
+ ret = set_expected_rtp(skb, ct, ctinfo, &addr,
htons(port), dptr);
}
}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index cc19506..e894aa1f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -29,12 +29,12 @@
module_param_array(ports, ushort, &ports_c, 0400);
MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
-unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
-static int tftp_help(struct sk_buff **pskb,
+static int tftp_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -46,7 +46,7 @@
int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
typeof(nf_nat_tftp_hook) nf_nat_tftp;
- tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr),
+ tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
sizeof(_tftph), &_tftph);
if (tfh == NULL)
return NF_ACCEPT;
@@ -70,7 +70,7 @@
nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
- ret = nf_nat_tftp(pskb, ctinfo, exp);
+ ret = nf_nat_tftp(skb, ctinfo, exp);
else if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0df7fff..196269c 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,7 +14,7 @@
/* core.c */
extern unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
+ struct sk_buff *skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a481a34..0cef143 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -256,14 +256,14 @@
if (verdict == NF_ACCEPT) {
afinfo = nf_get_afinfo(info->pf);
- if (!afinfo || afinfo->reroute(&skb, info) < 0)
+ if (!afinfo || afinfo->reroute(skb, info) < 0)
verdict = NF_DROP;
}
if (verdict == NF_ACCEPT) {
next_hook:
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
- &skb, info->hook,
+ skb, info->hook,
info->indev, info->outdev, &elem,
info->okfn, INT_MIN);
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49f0480..3ceeffc 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -617,6 +617,7 @@
nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
{
int diff;
+ int err;
diff = data_len - e->skb->len;
if (diff < 0) {
@@ -626,25 +627,18 @@
if (data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
+ err = pskb_expand_head(e->skb, 0,
+ diff - skb_tailroom(e->skb),
+ GFP_ATOMIC);
+ if (err) {
printk(KERN_WARNING "nf_queue: OOM "
"in mangle, dropping packet\n");
- return -ENOMEM;
+ return err;
}
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(&e->skb, data_len))
+ if (!skb_make_writable(e->skb, data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 07a1b96..77eeae6 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@
MODULE_ALIAS("ip6t_CLASSIFY");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -36,7 +36,7 @@
{
const struct xt_classify_target_info *clinfo = targinfo;
- (*pskb)->priority = clinfo->priority;
+ skb->priority = clinfo->priority;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 7043c27..8cc324b 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -34,7 +34,7 @@
#include <net/netfilter/nf_conntrack_ecache.h>
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -48,28 +48,28 @@
u_int32_t mark;
u_int32_t newmark;
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct) {
switch(markinfo->mode) {
case XT_CONNMARK_SET:
newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
if (newmark != ct->mark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, *pskb);
+ nf_conntrack_event_cache(IPCT_MARK, skb);
}
break;
case XT_CONNMARK_SAVE:
newmark = (ct->mark & ~markinfo->mask) |
- ((*pskb)->mark & markinfo->mask);
+ (skb->mark & markinfo->mask);
if (ct->mark != newmark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, *pskb);
+ nf_conntrack_event_cache(IPCT_MARK, skb);
}
break;
case XT_CONNMARK_RESTORE:
- mark = (*pskb)->mark;
+ mark = skb->mark;
diff = (ct->mark ^ mark) & markinfo->mask;
- (*pskb)->mark = mark ^ diff;
+ skb->mark = mark ^ diff;
break;
}
}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 63d7313..021b5c8 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -61,12 +61,11 @@
}
}
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- struct sk_buff *skb = *pskb;
const struct xt_connsecmark_target_info *info = targinfo;
switch (info->mode) {
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 798ab73..6322a93 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -25,7 +25,7 @@
MODULE_ALIAS("ipt_DSCP");
MODULE_ALIAS("ip6t_DSCP");
-static unsigned int target(struct sk_buff **pskb,
+static unsigned int target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -33,20 +33,20 @@
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
- ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+ ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
}
-static unsigned int target6(struct sk_buff **pskb,
+static unsigned int target6(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -54,13 +54,13 @@
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;
- ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+ ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index f30fe0b..bc6503d 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -22,7 +22,7 @@
MODULE_ALIAS("ip6t_MARK");
static unsigned int
-target_v0(struct sk_buff **pskb,
+target_v0(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -31,12 +31,12 @@
{
const struct xt_mark_target_info *markinfo = targinfo;
- (*pskb)->mark = markinfo->mark;
+ skb->mark = markinfo->mark;
return XT_CONTINUE;
}
static unsigned int
-target_v1(struct sk_buff **pskb,
+target_v1(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -52,15 +52,15 @@
break;
case XT_MARK_AND:
- mark = (*pskb)->mark & markinfo->mark;
+ mark = skb->mark & markinfo->mark;
break;
case XT_MARK_OR:
- mark = (*pskb)->mark | markinfo->mark;
+ mark = skb->mark | markinfo->mark;
break;
}
- (*pskb)->mark = mark;
+ skb->mark = mark;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index d3594c7..9fb449f 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -20,7 +20,7 @@
MODULE_ALIAS("ip6t_NFLOG");
static unsigned int
-nflog_target(struct sk_buff **pskb,
+nflog_target(struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
const void *targinfo)
@@ -33,7 +33,7 @@
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nf_log_packet(target->family, hooknum, *pskb, in, out, &li,
+ nf_log_packet(target->family, hooknum, skb, in, out, &li,
"%s", info->prefix);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 13f59f3..c3984e9 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,7 +24,7 @@
MODULE_ALIAS("arpt_NFQUEUE");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index fec1aef..4976ce1 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -12,7 +12,7 @@
MODULE_ALIAS("ip6t_NOTRACK");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -20,16 +20,16 @@
const void *targinfo)
{
/* Previously seen (loopback)? Ignore. */
- if ((*pskb)->nfct != NULL)
+ if (skb->nfct != NULL)
return XT_CONTINUE;
/* Attach fake conntrack entry.
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
- (*pskb)->nfct = &nf_conntrack_untracked.ct_general;
- (*pskb)->nfctinfo = IP_CT_NEW;
- nf_conntrack_get((*pskb)->nfct);
+ skb->nfct = &nf_conntrack_untracked.ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index c83779a..235806e 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -28,7 +28,7 @@
static u8 mode;
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
@@ -47,7 +47,7 @@
BUG();
}
- (*pskb)->secmark = secmark;
+ skb->secmark = secmark;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d40f7e4..07435a6 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -39,7 +39,7 @@
}
static int
-tcpmss_mangle_packet(struct sk_buff **pskb,
+tcpmss_mangle_packet(struct sk_buff *skb,
const struct xt_tcpmss_info *info,
unsigned int tcphoff,
unsigned int minlen)
@@ -50,11 +50,11 @@
u16 newmss;
u8 *opt;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return -1;
- tcplen = (*pskb)->len - tcphoff;
- tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+ tcplen = skb->len - tcphoff;
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
/* Since it passed flags test in tcp match, we know it is is
not a fragment, and has data >= tcp header length. SYN
@@ -64,19 +64,19 @@
if (tcplen != tcph->doff*4) {
if (net_ratelimit())
printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
- (*pskb)->len);
+ skb->len);
return -1;
}
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
- if (dst_mtu((*pskb)->dst) <= minlen) {
+ if (dst_mtu(skb->dst) <= minlen) {
if (net_ratelimit())
printk(KERN_ERR "xt_TCPMSS: "
"unknown or invalid path-MTU (%u)\n",
- dst_mtu((*pskb)->dst));
+ dst_mtu(skb->dst));
return -1;
}
- newmss = dst_mtu((*pskb)->dst) - minlen;
+ newmss = dst_mtu(skb->dst) - minlen;
} else
newmss = info->mss;
@@ -95,7 +95,7 @@
opt[i+2] = (newmss & 0xff00) >> 8;
opt[i+3] = newmss & 0x00ff;
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
htons(oldmss), htons(newmss), 0);
return 0;
}
@@ -104,57 +104,53 @@
/*
* MSS Option not found ?! add it..
*/
- if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
- TCPOLEN_MSS, GFP_ATOMIC);
- if (!newskb)
+ if (skb_tailroom(skb) < TCPOLEN_MSS) {
+ if (pskb_expand_head(skb, 0,
+ TCPOLEN_MSS - skb_tailroom(skb),
+ GFP_ATOMIC))
return -1;
- kfree_skb(*pskb);
- *pskb = newskb;
- tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
}
- skb_put((*pskb), TCPOLEN_MSS);
+ skb_put(skb, TCPOLEN_MSS);
opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = newmss & 0x00ff;
- nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
+ nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
oldval, ((__be16 *)tcph)[6], 0);
return TCPOLEN_MSS;
}
static unsigned int
-xt_tcpmss_target4(struct sk_buff **pskb,
+xt_tcpmss_target4(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
__be16 newlen;
int ret;
- ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4,
+ ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4,
sizeof(*iph) + sizeof(struct tcphdr));
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
newlen = htons(ntohs(iph->tot_len) + ret);
nf_csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -164,30 +160,30 @@
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
static unsigned int
-xt_tcpmss_target6(struct sk_buff **pskb,
+xt_tcpmss_target6(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
int tcphoff;
int ret;
nexthdr = ipv6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr);
+ tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
if (tcphoff < 0) {
WARN_ON(1);
return NF_DROP;
}
- ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff,
+ ret = tcpmss_mangle_packet(skb, targinfo, tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- ipv6h = ipv6_hdr(*pskb);
+ ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 4df2ded..26c5d08 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -10,14 +10,14 @@
MODULE_ALIAS("ip6t_TRACE");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- (*pskb)->nf_trace = 1;
+ skb->nf_trace = 1;
return XT_CONTINUE;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c776bcd..98e313e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1378,6 +1378,8 @@
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
+ } else {
+ kfree(listeners);
}
netlink_table_ungrab();
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6b407ec..fa006e0 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -202,11 +202,7 @@
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
from earlier kernels */
-
- /* iptables targets take a double skb pointer in case the skb
- * needs to be replaced. We don't own the skb, so this must not
- * happen. The pskb_expand_head above should make sure of this */
- ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+ ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
ipt->tcfi_hook,
ipt->tcfi_t->u.kernel.target,
ipt->tcfi_t->data);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 2d32fd2..3f8335e 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -205,20 +205,19 @@
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
static unsigned int
-ing_hook(unsigned int hook, struct sk_buff **pskb,
+ing_hook(unsigned int hook, struct sk_buff *skb,
const struct net_device *indev,
const struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
struct Qdisc *q;
- struct sk_buff *skb = *pskb;
struct net_device *dev = skb->dev;
int fwres=NF_ACCEPT;
DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
skb->sk ? "(owned)" : "(unowned)",
- skb->dev ? (*pskb)->dev->name : "(no dev)",
+ skb->dev ? skb->dev->name : "(no dev)",
skb->len);
if (dev->qdisc_ingress) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9de3dda..eb4deaf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -954,9 +954,9 @@
.flags = SCTP_PROTOSW_FLAG,
};
-static int sctp6_rcv(struct sk_buff **pskb)
+static int sctp6_rcv(struct sk_buff *skb)
{
- return sctp_rcv(*pskb) ? -1 : 0;
+ return sctp_rcv(skb) ? -1 : 0;
}
static struct inet6_protocol sctpv6_protocol = {
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 8ebfc4d..5c69a72 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 42b3220..8bd074d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -42,7 +42,7 @@
{
u8 *ptr;
u8 pad;
- int len = buf->len;
+ size_t len = buf->len;
if (len <= buf->head[0].iov_len) {
pad = *(u8 *)(buf->head[0].iov_base + len - 1);
@@ -53,9 +53,9 @@
} else
len -= buf->head[0].iov_len;
if (len <= buf->page_len) {
- int last = (buf->page_base + len - 1)
+ unsigned int last = (buf->page_base + len - 1)
>>PAGE_CACHE_SHIFT;
- int offset = (buf->page_base + len - 1)
+ unsigned int offset = (buf->page_base + len - 1)
& (PAGE_CACHE_SIZE - 1);
ptr = kmap_atomic(buf->pages[last], KM_USER0);
pad = *(ptr + offset);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52429b1..76be83e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -127,7 +127,14 @@
struct rpc_clnt *clnt = NULL;
struct rpc_auth *auth;
int err;
- int len;
+ size_t len;
+
+ /* sanity check the name before trying to print it */
+ err = -EINVAL;
+ len = strlen(servname);
+ if (len > RPC_MAXNETNAMELEN)
+ goto out_no_rpciod;
+ len++;
dprintk("RPC: creating %s client for %s (xprt %p)\n",
program->name, servname, xprt);
@@ -148,7 +155,6 @@
clnt->cl_parent = clnt;
clnt->cl_server = clnt->cl_inline_name;
- len = strlen(servname) + 1;
if (len > sizeof(clnt->cl_inline_name)) {
char *buf = kmalloc(len, GFP_KERNEL);
if (buf != 0)
@@ -234,8 +240,8 @@
{
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
- struct rpc_xprtsock_create xprtargs = {
- .proto = args->protocol,
+ struct xprt_create xprtargs = {
+ .ident = args->protocol,
.srcaddr = args->saddress,
.dstaddr = args->address,
.addrlen = args->addrsize,
@@ -253,7 +259,7 @@
*/
if (args->servername == NULL) {
struct sockaddr_in *addr =
- (struct sockaddr_in *) &args->address;
+ (struct sockaddr_in *) args->address;
snprintf(servername, sizeof(servername), NIPQUAD_FMT,
NIPQUAD(addr->sin_addr.s_addr));
args->servername = servername;
@@ -269,9 +275,6 @@
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- dprintk("RPC: creating %s client for %s (xprt %p)\n",
- args->program->name, args->servername, xprt);
-
clnt = rpc_new_client(xprt, args->servername, args->program,
args->version, args->authflavor);
if (IS_ERR(clnt))
@@ -439,7 +442,7 @@
*/
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
struct rpc_program *program,
- int vers)
+ u32 vers)
{
struct rpc_clnt *clnt;
struct rpc_version *version;
@@ -843,8 +846,7 @@
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
if (RPC_IS_ASYNC(task) || !signalled()) {
- xprt_release(task);
- task->tk_action = call_reserve;
+ task->tk_action = call_allocate;
rpc_delay(task, HZ>>4);
return;
}
@@ -871,6 +873,7 @@
buf->head[0].iov_len = len;
buf->tail[0].iov_len = 0;
buf->page_len = 0;
+ buf->flags = 0;
buf->len = 0;
buf->buflen = len;
}
@@ -937,7 +940,7 @@
static void
call_bind_status(struct rpc_task *task)
{
- int status = -EACCES;
+ int status = -EIO;
if (task->tk_status >= 0) {
dprint_status(task);
@@ -947,9 +950,20 @@
}
switch (task->tk_status) {
+ case -EAGAIN:
+ dprintk("RPC: %5u rpcbind waiting for another request "
+ "to finish\n", task->tk_pid);
+ /* avoid busy-waiting here -- could be a network outage. */
+ rpc_delay(task, 5*HZ);
+ goto retry_timeout;
case -EACCES:
dprintk("RPC: %5u remote rpcbind: RPC program/version "
"unavailable\n", task->tk_pid);
+ /* fail immediately if this is an RPC ping */
+ if (task->tk_msg.rpc_proc->p_proc == 0) {
+ status = -EOPNOTSUPP;
+ break;
+ }
rpc_delay(task, 3*HZ);
goto retry_timeout;
case -ETIMEDOUT:
@@ -957,6 +971,7 @@
task->tk_pid);
goto retry_timeout;
case -EPFNOSUPPORT:
+ /* server doesn't support any rpcbind version we know of */
dprintk("RPC: %5u remote rpcbind service unavailable\n",
task->tk_pid);
break;
@@ -969,7 +984,6 @@
default:
dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
task->tk_pid, -task->tk_status);
- status = -EIO;
}
rpc_exit(task, status);
@@ -1257,7 +1271,6 @@
{
dprint_status(task);
- xprt_release(task); /* Must do to obtain new XID */
task->tk_action = call_refreshresult;
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
@@ -1375,6 +1388,8 @@
dprintk("RPC: %5u %s: retry stale creds\n",
task->tk_pid, __FUNCTION__);
rpcauth_invalcred(task);
+ /* Ensure we obtain a new XID! */
+ xprt_release(task);
task->tk_action = call_refresh;
goto out_retry;
case RPC_AUTH_BADCRED:
@@ -1523,13 +1538,18 @@
spin_lock(&clnt->cl_lock);
list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
const char *rpc_waitq = "none";
+ int proc;
+
+ if (t->tk_msg.rpc_proc)
+ proc = t->tk_msg.rpc_proc->p_proc;
+ else
+ proc = -1;
if (RPC_IS_QUEUED(t))
rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
- t->tk_pid,
- (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+ t->tk_pid, proc,
t->tk_flags, t->tk_status,
t->tk_client,
(t->tk_client ? t->tk_client->cl_prog : 0),
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 669e12a..c8433e8 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -14,7 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/namei.h>
-#include <linux/dnotify.h>
+#include <linux/fsnotify.h>
#include <linux/kernel.h>
#include <asm/ioctls.h>
@@ -329,6 +329,7 @@
clnt->cl_prog, clnt->cl_vers);
seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
+ seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
return 0;
}
@@ -585,6 +586,7 @@
if (S_ISDIR(mode))
inc_nlink(dir);
d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
}
mutex_unlock(&dir->i_mutex);
return 0;
@@ -606,7 +608,7 @@
inode->i_ino = iunique(dir->i_sb, 100);
d_instantiate(dentry, inode);
inc_nlink(dir);
- inode_dir_notify(dir, DN_CREATE);
+ fsnotify_mkdir(dir, dentry);
return 0;
out_err:
printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -748,7 +750,7 @@
rpci->flags = flags;
rpci->ops = ops;
rpci->nkern_readwriters = 1;
- inode_dir_notify(dir, DN_CREATE);
+ fsnotify_create(dir, dentry);
dget(dentry);
out:
mutex_unlock(&dir->i_mutex);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d1740db..a05493a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,11 +16,14 @@
#include <linux/types.h>
#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_BIND
@@ -91,26 +94,6 @@
#define RPCB_MAXADDRLEN (128u)
/*
- * r_netid
- *
- * Quoting RFC 3530, section 2.2:
- *
- * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP
- * over IPv4 the value of r_netid is the string "udp".
- *
- * ...
- *
- * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP
- * over IPv6 the value of r_netid is the string "udp6".
- */
-#define RPCB_NETID_UDP "\165\144\160" /* "udp" */
-#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */
-#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */
-#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */
-
-#define RPCB_MAXNETIDLEN (4u)
-
-/*
* r_owner
*
* The "owner" is allowed to unset a service in the rpcbind database.
@@ -120,7 +103,7 @@
#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
static void rpcb_getport_done(struct rpc_task *, void *);
-extern struct rpc_program rpcb_program;
+static struct rpc_program rpcb_program;
struct rpcbind_args {
struct rpc_xprt * r_xprt;
@@ -137,10 +120,13 @@
static struct rpc_procinfo rpcb_procedures2[];
static struct rpc_procinfo rpcb_procedures3[];
-static struct rpcb_info {
+struct rpcb_info {
int rpc_vers;
struct rpc_procinfo * rpc_proc;
-} rpcb_next_version[];
+};
+
+static struct rpcb_info rpcb_next_version[];
+static struct rpcb_info rpcb_next_version6[];
static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
{
@@ -190,7 +176,17 @@
RPC_CLNT_CREATE_INTR),
};
- ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+ switch (srvaddr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
+ break;
+ default:
+ return NULL;
+ }
+
if (!privileged)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
return rpc_create(&args);
@@ -234,7 +230,7 @@
prog, vers, prot, port);
rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
- IPPROTO_UDP, 2, 1);
+ XPRT_TRANSPORT_UDP, 2, 1);
if (IS_ERR(rpcb_clnt))
return PTR_ERR(rpcb_clnt);
@@ -316,6 +312,7 @@
struct rpc_task *child;
struct sockaddr addr;
int status;
+ struct rpcb_info *info;
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __FUNCTION__,
@@ -325,7 +322,7 @@
BUG_ON(clnt->cl_parent != clnt);
if (xprt_test_and_set_binding(xprt)) {
- status = -EACCES; /* tell caller to check again */
+ status = -EAGAIN; /* tell caller to check again */
dprintk("RPC: %5u %s: waiting for another binder\n",
task->tk_pid, __FUNCTION__);
goto bailout_nowake;
@@ -343,18 +340,43 @@
goto bailout_nofree;
}
- if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+ rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+
+ /* Don't ever use rpcbind v2 for AF_INET6 requests */
+ switch (addr.sa_family) {
+ case AF_INET:
+ info = rpcb_next_version;
+ break;
+ case AF_INET6:
+ info = rpcb_next_version6;
+ break;
+ default:
+ status = -EAFNOSUPPORT;
+ dprintk("RPC: %5u %s: bad address family\n",
+ task->tk_pid, __FUNCTION__);
+ goto bailout_nofree;
+ }
+ if (info[xprt->bind_index].rpc_proc == NULL) {
xprt->bind_index = 0;
- status = -EACCES; /* tell caller to try again later */
+ status = -EPFNOSUPPORT;
dprintk("RPC: %5u %s: no more getport versions available\n",
task->tk_pid, __FUNCTION__);
goto bailout_nofree;
}
- bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+ bind_version = info[xprt->bind_index].rpc_vers;
dprintk("RPC: %5u %s: trying rpcbind version %u\n",
task->tk_pid, __FUNCTION__, bind_version);
+ rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+ bind_version, 0);
+ if (IS_ERR(rpcb_clnt)) {
+ status = PTR_ERR(rpcb_clnt);
+ dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
+ task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
+ goto bailout_nofree;
+ }
+
map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
if (!map) {
status = -ENOMEM;
@@ -367,28 +389,19 @@
map->r_prot = xprt->prot;
map->r_port = 0;
map->r_xprt = xprt_get(xprt);
- map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
- RPCB_NETID_UDP;
- memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
- sizeof(map->r_addr));
+ map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+ memcpy(map->r_addr,
+ rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
+ sizeof(map->r_addr));
map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
- rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
- rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
- if (IS_ERR(rpcb_clnt)) {
- status = PTR_ERR(rpcb_clnt);
- dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
- task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
- goto bailout;
- }
-
child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
rpc_release_client(rpcb_clnt);
if (IS_ERR(child)) {
status = -EIO;
dprintk("RPC: %5u %s: rpc_run_task failed\n",
task->tk_pid, __FUNCTION__);
- goto bailout_nofree;
+ goto bailout;
}
rpc_put_task(child);
@@ -403,6 +416,7 @@
bailout_nowake:
task->tk_status = status;
}
+EXPORT_SYMBOL_GPL(rpcb_getport_async);
/*
* Rpcbind child task calls this callback via tk_exit.
@@ -413,6 +427,10 @@
struct rpc_xprt *xprt = map->r_xprt;
int status = child->tk_status;
+ /* Garbage reply: retry with a lesser rpcbind version */
+ if (status == -EIO)
+ status = -EPROTONOSUPPORT;
+
/* rpcbind server doesn't support this rpcbind protocol version */
if (status == -EPROTONOSUPPORT)
xprt->bind_index++;
@@ -490,16 +508,24 @@
unsigned short *portp)
{
char *addr;
- int addr_len, c, i, f, first, val;
+ u32 addr_len;
+ int c, i, f, first, val;
*portp = 0;
- addr_len = (unsigned int) ntohl(*p++);
- if (addr_len > RPCB_MAXADDRLEN) /* sanity */
- return -EINVAL;
+ addr_len = ntohl(*p++);
- dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n",
- (char *) p);
+ /*
+ * Simple sanity check. The smallest possible universal
+ * address is an IPv4 address string containing 11 bytes.
+ */
+ if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+ goto out_err;
+ /*
+ * Start at the end and walk backwards until the first dot
+ * is encountered. When the second dot is found, we have
+ * both parts of the port number.
+ */
addr = (char *)p;
val = 0;
first = 1;
@@ -521,8 +547,19 @@
}
}
+ /*
+ * Simple sanity check. If we never saw a dot in the reply,
+ * then this was probably just garbage.
+ */
+ if (first)
+ goto out_err;
+
dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp);
return 0;
+
+out_err:
+ dprintk("RPC: rpcbind server returned malformed reply\n");
+ return -EIO;
}
#define RPCB_program_sz (1u)
@@ -531,7 +568,7 @@
#define RPCB_port_sz (1u)
#define RPCB_boolean_sz (1u)
-#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
+#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
@@ -593,6 +630,14 @@
{ 0, NULL },
};
+static struct rpcb_info rpcb_next_version6[] = {
+#ifdef CONFIG_SUNRPC_BIND34
+ { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
+ { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+#endif
+ { 0, NULL },
+};
+
static struct rpc_version rpcb_version2 = {
.number = 2,
.nrprocs = RPCB_HIGHPROC_2,
@@ -621,7 +666,7 @@
static struct rpc_stat rpcb_stats;
-struct rpc_program rpcb_program = {
+static struct rpc_program rpcb_program = {
.name = "rpcbind",
.number = RPCBIND_PROGRAM,
.nrvers = ARRAY_SIZE(rpcb_version),
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 954d7ec..3c773c5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -777,6 +777,7 @@
task->tk_pid, size, buf);
return &buf->data;
}
+EXPORT_SYMBOL_GPL(rpc_malloc);
/**
* rpc_free - free buffer allocated via rpc_malloc
@@ -802,6 +803,7 @@
else
kfree(buf);
}
+EXPORT_SYMBOL_GPL(rpc_free);
/*
* Creation and deletion of RPC task structures
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1d377d1..97ac45f0 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -34,6 +34,7 @@
desc->offset += len;
return len;
}
+EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
/**
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -137,6 +138,7 @@
out:
return copied;
}
+EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
/**
* csum_partial_copy_to_xdr - checksum and copy data
@@ -179,3 +181,4 @@
return -1;
return 0;
}
+EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 384c4ad..33d89e8 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -20,7 +20,7 @@
#include <linux/sunrpc/auth.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-
+#include <linux/sunrpc/xprtsock.h>
/* RPC scheduler */
EXPORT_SYMBOL(rpc_execute);
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index 8142fdb..31becbf 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/unistd.h>
+#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
@@ -40,6 +41,7 @@
rt->ntimeouts[i] = 0;
}
}
+EXPORT_SYMBOL_GPL(rpc_init_rtt);
/*
* NB: When computing the smoothed RTT and standard deviation,
@@ -75,6 +77,7 @@
if (*sdrtt < RPC_RTO_MIN)
*sdrtt = RPC_RTO_MIN;
}
+EXPORT_SYMBOL_GPL(rpc_update_rtt);
/*
* Estimate rto for an nfs rpc sent via. an unreliable datagram.
@@ -103,3 +106,4 @@
return res;
}
+EXPORT_SYMBOL_GPL(rpc_calc_rto);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c8c2edc..282a9a2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,9 @@
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(xprt_list);
+
/*
* The transport code maintains an estimate on the maximum number of out-
* standing RPC requests, using a smoothed version of the congestion
@@ -81,6 +84,78 @@
#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
/**
+ * xprt_register_transport - register a transport implementation
+ * @transport: transport to register
+ *
+ * If a transport implementation is loaded as a kernel module, it can
+ * call this interface to make itself known to the RPC client.
+ *
+ * Returns:
+ * 0: transport successfully registered
+ * -EEXIST: transport already registered
+ * -EINVAL: transport module being unloaded
+ */
+int xprt_register_transport(struct xprt_class *transport)
+{
+ struct xprt_class *t;
+ int result;
+
+ result = -EEXIST;
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ /* don't register the same transport class twice */
+ if (t->ident == transport->ident)
+ goto out;
+ }
+
+ result = -EINVAL;
+ if (try_module_get(THIS_MODULE)) {
+ list_add_tail(&transport->list, &xprt_list);
+ printk(KERN_INFO "RPC: Registered %s transport module.\n",
+ transport->name);
+ result = 0;
+ }
+
+out:
+ spin_unlock(&xprt_list_lock);
+ return result;
+}
+EXPORT_SYMBOL_GPL(xprt_register_transport);
+
+/**
+ * xprt_unregister_transport - unregister a transport implementation
+ * transport: transport to unregister
+ *
+ * Returns:
+ * 0: transport successfully unregistered
+ * -ENOENT: transport never registered
+ */
+int xprt_unregister_transport(struct xprt_class *transport)
+{
+ struct xprt_class *t;
+ int result;
+
+ result = 0;
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ if (t == transport) {
+ printk(KERN_INFO
+ "RPC: Unregistered %s transport module.\n",
+ transport->name);
+ list_del_init(&transport->list);
+ module_put(THIS_MODULE);
+ goto out;
+ }
+ }
+ result = -ENOENT;
+
+out:
+ spin_unlock(&xprt_list_lock);
+ return result;
+}
+EXPORT_SYMBOL_GPL(xprt_unregister_transport);
+
+/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
*
@@ -118,6 +193,7 @@
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
return 0;
}
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
@@ -167,6 +243,7 @@
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
return 0;
}
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -246,6 +323,7 @@
__xprt_lock_write_next(xprt);
}
}
+EXPORT_SYMBOL_GPL(xprt_release_xprt);
/**
* xprt_release_xprt_cong - allow other requests to use a transport
@@ -262,6 +340,7 @@
__xprt_lock_write_next_cong(xprt);
}
}
+EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -314,6 +393,7 @@
{
__xprt_put_cong(task->tk_xprt, task->tk_rqstp);
}
+EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
/**
* xprt_adjust_cwnd - adjust transport congestion window
@@ -345,6 +425,7 @@
xprt->cwnd = cwnd;
__xprt_put_cong(xprt, req);
}
+EXPORT_SYMBOL_GPL(xprt_adjust_cwnd);
/**
* xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
@@ -359,6 +440,7 @@
else
rpc_wake_up(&xprt->pending);
}
+EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
/**
* xprt_wait_for_buffer_space - wait for transport output buffer to clear
@@ -373,6 +455,7 @@
task->tk_timeout = req->rq_timeout;
rpc_sleep_on(&xprt->pending, task, NULL, NULL);
}
+EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
/**
* xprt_write_space - wake the task waiting for transport output buffer space
@@ -393,6 +476,7 @@
}
spin_unlock_bh(&xprt->transport_lock);
}
+EXPORT_SYMBOL_GPL(xprt_write_space);
/**
* xprt_set_retrans_timeout_def - set a request's retransmit timeout
@@ -406,6 +490,7 @@
{
task->tk_timeout = task->tk_rqstp->rq_timeout;
}
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
/*
* xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
@@ -425,6 +510,7 @@
if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
task->tk_timeout = max_timeout;
}
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
static void xprt_reset_majortimeo(struct rpc_rqst *req)
{
@@ -500,6 +586,7 @@
xprt_wake_pending_tasks(xprt, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock);
}
+EXPORT_SYMBOL_GPL(xprt_disconnect);
static void
xprt_init_autodisconnect(unsigned long data)
@@ -610,6 +697,7 @@
xprt->stat.bad_xids++;
return NULL;
}
+EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
/**
* xprt_update_rtt - update an RPC client's RTT state after receiving a reply
@@ -629,6 +717,7 @@
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
}
}
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
/**
* xprt_complete_rqst - called when reply processing is complete
@@ -653,6 +742,7 @@
req->rq_received = req->rq_private_buf.len = copied;
rpc_wake_up_task(task);
}
+EXPORT_SYMBOL_GPL(xprt_complete_rqst);
static void xprt_timer(struct rpc_task *task)
{
@@ -889,23 +979,25 @@
* @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
{
struct rpc_xprt *xprt;
struct rpc_rqst *req;
+ struct xprt_class *t;
- switch (args->proto) {
- case IPPROTO_UDP:
- xprt = xs_setup_udp(args);
- break;
- case IPPROTO_TCP:
- xprt = xs_setup_tcp(args);
- break;
- default:
- printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
- args->proto);
- return ERR_PTR(-EIO);
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ if (t->ident == args->ident) {
+ spin_unlock(&xprt_list_lock);
+ goto found;
+ }
}
+ spin_unlock(&xprt_list_lock);
+ printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+ return ERR_PTR(-EIO);
+
+found:
+ xprt = t->setup(args);
if (IS_ERR(xprt)) {
dprintk("RPC: xprt_create_transport: failed, %ld\n",
-PTR_ERR(xprt));
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
new file mode 100644
index 0000000..264f0fe
--- /dev/null
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
+
+xprtrdma-y := transport.o rpc_rdma.o verbs.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
new file mode 100644
index 0000000..12db635
--- /dev/null
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * rpc_rdma.c
+ *
+ * This file contains the guts of the RPC RDMA protocol, and
+ * does marshaling/unmarshaling, etc. It is also where interfacing
+ * to the Linux RPC framework lives.
+ */
+
+#include "xprt_rdma.h"
+
+#include <linux/highmem.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+enum rpcrdma_chunktype {
+ rpcrdma_noch = 0,
+ rpcrdma_readch,
+ rpcrdma_areadch,
+ rpcrdma_writech,
+ rpcrdma_replych
+};
+
+#ifdef RPC_DEBUG
+static const char transfertypes[][12] = {
+ "pure inline", /* no chunks */
+ " read chunk", /* some argument via rdma read */
+ "*read chunk", /* entire request via rdma read */
+ "write chunk", /* some result via rdma write */
+ "reply chunk" /* entire reply via rdma write */
+};
+#endif
+
+/*
+ * Chunk assembly from upper layer xdr_buf.
+ *
+ * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+ * elements. Segments are then coalesced when registered, if possible
+ * within the selected memreg mode.
+ *
+ * Note, this routine is never called if the connection's memory
+ * registration strategy is 0 (bounce buffers).
+ */
+
+static int
+rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
+ enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
+{
+ int len, n = 0, p;
+
+ if (pos == 0 && xdrbuf->head[0].iov_len) {
+ seg[n].mr_page = NULL;
+ seg[n].mr_offset = xdrbuf->head[0].iov_base;
+ seg[n].mr_len = xdrbuf->head[0].iov_len;
+ pos += xdrbuf->head[0].iov_len;
+ ++n;
+ }
+
+ if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
+ if (n == nsegs)
+ return 0;
+ seg[n].mr_page = xdrbuf->pages[0];
+ seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
+ seg[n].mr_len = min_t(u32,
+ PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
+ len = xdrbuf->page_len - seg[n].mr_len;
+ pos += len;
+ ++n;
+ p = 1;
+ while (len > 0) {
+ if (n == nsegs)
+ return 0;
+ seg[n].mr_page = xdrbuf->pages[p];
+ seg[n].mr_offset = NULL;
+ seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
+ len -= seg[n].mr_len;
+ ++n;
+ ++p;
+ }
+ }
+
+ if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) {
+ if (n == nsegs)
+ return 0;
+ seg[n].mr_page = NULL;
+ seg[n].mr_offset = xdrbuf->tail[0].iov_base;
+ seg[n].mr_len = xdrbuf->tail[0].iov_len;
+ pos += xdrbuf->tail[0].iov_len;
+ ++n;
+ }
+
+ if (pos < xdrbuf->len)
+ dprintk("RPC: %s: marshaled only %d of %d\n",
+ __func__, pos, xdrbuf->len);
+
+ return n;
+}
+
+/*
+ * Create read/write chunk lists, and reply chunks, for RDMA
+ *
+ * Assume check against THRESHOLD has been done, and chunks are required.
+ * Assume only encoding one list entry for read|write chunks. The NFSv3
+ * protocol is simple enough to allow this as it only has a single "bulk
+ * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
+ * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
+ *
+ * When used for a single reply chunk (which is a special write
+ * chunk used for the entire reply, rather than just the data), it
+ * is used primarily for READDIR and READLINK which would otherwise
+ * be severely size-limited by a small rdma inline read max. The server
+ * response will come back as an RDMA Write, followed by a message
+ * of type RDMA_NOMSG carrying the xid and length. As a result, reply
+ * chunks do not provide data alignment, however they do not require
+ * "fixup" (moving the response to the upper layer buffer) either.
+ *
+ * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
+ *
+ * Read chunklist (a linked list):
+ * N elements, position P (same P for all chunks of same arg!):
+ * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
+ *
+ * Write chunklist (a list of (one) counted array):
+ * N elements:
+ * 1 - N - HLOO - HLOO - ... - HLOO - 0
+ *
+ * Reply chunk (a counted array):
+ * N elements:
+ * 1 - N - HLOO - HLOO - ... - HLOO
+ */
+
+static unsigned int
+rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
+{
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
+ int nsegs, nchunks = 0;
+ int pos;
+ struct rpcrdma_mr_seg *seg = req->rl_segments;
+ struct rpcrdma_read_chunk *cur_rchunk = NULL;
+ struct rpcrdma_write_array *warray = NULL;
+ struct rpcrdma_write_chunk *cur_wchunk = NULL;
+ u32 *iptr = headerp->rm_body.rm_chunks;
+
+ if (type == rpcrdma_readch || type == rpcrdma_areadch) {
+ /* a read chunk - server will RDMA Read our memory */
+ cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
+ } else {
+ /* a write or reply chunk - server will RDMA Write our memory */
+ *iptr++ = xdr_zero; /* encode a NULL read chunk list */
+ if (type == rpcrdma_replych)
+ *iptr++ = xdr_zero; /* a NULL write chunk list */
+ warray = (struct rpcrdma_write_array *) iptr;
+ cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
+ }
+
+ if (type == rpcrdma_replych || type == rpcrdma_areadch)
+ pos = 0;
+ else
+ pos = target->head[0].iov_len;
+
+ nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
+ if (nsegs == 0)
+ return 0;
+
+ do {
+ /* bind/register the memory, then build chunk from result. */
+ int n = rpcrdma_register_external(seg, nsegs,
+ cur_wchunk != NULL, r_xprt);
+ if (n <= 0)
+ goto out;
+ if (cur_rchunk) { /* read */
+ cur_rchunk->rc_discrim = xdr_one;
+ /* all read chunks have the same "position" */
+ cur_rchunk->rc_position = htonl(pos);
+ cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
+ cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
+ xdr_encode_hyper(
+ (u32 *)&cur_rchunk->rc_target.rs_offset,
+ seg->mr_base);
+ dprintk("RPC: %s: read chunk "
+ "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__,
+ seg->mr_len, seg->mr_base, seg->mr_rkey, pos,
+ n < nsegs ? "more" : "last");
+ cur_rchunk++;
+ r_xprt->rx_stats.read_chunk_count++;
+ } else { /* write/reply */
+ cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
+ cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
+ xdr_encode_hyper(
+ (u32 *)&cur_wchunk->wc_target.rs_offset,
+ seg->mr_base);
+ dprintk("RPC: %s: %s chunk "
+ "elem %d@0x%llx:0x%x (%s)\n", __func__,
+ (type == rpcrdma_replych) ? "reply" : "write",
+ seg->mr_len, seg->mr_base, seg->mr_rkey,
+ n < nsegs ? "more" : "last");
+ cur_wchunk++;
+ if (type == rpcrdma_replych)
+ r_xprt->rx_stats.reply_chunk_count++;
+ else
+ r_xprt->rx_stats.write_chunk_count++;
+ r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ }
+ nchunks++;
+ seg += n;
+ nsegs -= n;
+ } while (nsegs);
+
+ /* success. all failures return above */
+ req->rl_nchunks = nchunks;
+
+ BUG_ON(nchunks == 0);
+
+ /*
+ * finish off header. If write, marshal discrim and nchunks.
+ */
+ if (cur_rchunk) {
+ iptr = (u32 *) cur_rchunk;
+ *iptr++ = xdr_zero; /* finish the read chunk list */
+ *iptr++ = xdr_zero; /* encode a NULL write chunk list */
+ *iptr++ = xdr_zero; /* encode a NULL reply chunk */
+ } else {
+ warray->wc_discrim = xdr_one;
+ warray->wc_nchunks = htonl(nchunks);
+ iptr = (u32 *) cur_wchunk;
+ if (type == rpcrdma_writech) {
+ *iptr++ = xdr_zero; /* finish the write chunk list */
+ *iptr++ = xdr_zero; /* encode a NULL reply chunk */
+ }
+ }
+
+ /*
+ * Return header size.
+ */
+ return (unsigned char *)iptr - (unsigned char *)headerp;
+
+out:
+ for (pos = 0; nchunks--;)
+ pos += rpcrdma_deregister_external(
+ &req->rl_segments[pos], r_xprt, NULL);
+ return 0;
+}
+
+/*
+ * Copy write data inline.
+ * This function is used for "small" requests. Data which is passed
+ * to RPC via iovecs (or page list) is copied directly into the
+ * pre-registered memory buffer for this request. For small amounts
+ * of data, this is efficient. The cutoff value is tunable.
+ */
+static int
+rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
+{
+ int i, npages, curlen;
+ int copy_len;
+ unsigned char *srcp, *destp;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+
+ destp = rqst->rq_svec[0].iov_base;
+ curlen = rqst->rq_svec[0].iov_len;
+ destp += curlen;
+ /*
+ * Do optional padding where it makes sense. Alignment of write
+ * payload can help the server, if our setting is accurate.
+ */
+ pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
+ if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
+ pad = 0; /* don't pad this request */
+
+ dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n",
+ __func__, pad, destp, rqst->rq_slen, curlen);
+
+ copy_len = rqst->rq_snd_buf.page_len;
+ r_xprt->rx_stats.pullup_copy_count += copy_len;
+ npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
+ for (i = 0; copy_len && i < npages; i++) {
+ if (i == 0)
+ curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
+ else
+ curlen = PAGE_SIZE;
+ if (curlen > copy_len)
+ curlen = copy_len;
+ dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
+ __func__, i, destp, copy_len, curlen);
+ srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
+ KM_SKB_SUNRPC_DATA);
+ if (i == 0)
+ memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
+ else
+ memcpy(destp, srcp, curlen);
+ kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
+ rqst->rq_svec[0].iov_len += curlen;
+ destp += curlen;
+ copy_len -= curlen;
+ }
+ if (rqst->rq_snd_buf.tail[0].iov_len) {
+ curlen = rqst->rq_snd_buf.tail[0].iov_len;
+ if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
+ memcpy(destp,
+ rqst->rq_snd_buf.tail[0].iov_base, curlen);
+ r_xprt->rx_stats.pullup_copy_count += curlen;
+ }
+ dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
+ __func__, destp, copy_len, curlen);
+ rqst->rq_svec[0].iov_len += curlen;
+ }
+ /* header now contains entire send message */
+ return pad;
+}
+
+/*
+ * Marshal a request: the primary job of this routine is to choose
+ * the transfer modes. See comments below.
+ *
+ * Uses multiple RDMA IOVs for a request:
+ * [0] -- RPC RDMA header, which uses memory from the *start* of the
+ * preregistered buffer that already holds the RPC data in
+ * its middle.
+ * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
+ * [2] -- optional padding.
+ * [3] -- if padded, header only in [1] and data here.
+ */
+
+int
+rpcrdma_marshal_req(struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_task->tk_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ char *base;
+ size_t hdrlen, rpclen, padlen;
+ enum rpcrdma_chunktype rtype, wtype;
+ struct rpcrdma_msg *headerp;
+
+ /*
+ * rpclen gets amount of data in first buffer, which is the
+ * pre-registered buffer.
+ */
+ base = rqst->rq_svec[0].iov_base;
+ rpclen = rqst->rq_svec[0].iov_len;
+
+ /* build RDMA header in private area at front */
+ headerp = (struct rpcrdma_msg *) req->rl_base;
+ /* don't htonl XID, it's already done in request */
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = xdr_one;
+ headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
+ headerp->rm_type = __constant_htonl(RDMA_MSG);
+
+ /*
+ * Chunks needed for results?
+ *
+ * o If the expected result is under the inline threshold, all ops
+ * return as inline (but see later).
+ * o Large non-read ops return as a single reply chunk.
+ * o Large read ops return data as write chunk(s), header as inline.
+ *
+ * Note: the NFS code sending down multiple result segments implies
+ * the op is one of read, readdir[plus], readlink or NFSv4 getacl.
+ */
+
+ /*
+ * This code can handle read chunks, write chunks OR reply
+ * chunks -- only one type. If the request is too big to fit
+ * inline, then we will choose read chunks. If the request is
+ * a READ, then use write chunks to separate the file data
+ * into pages; otherwise use reply chunks.
+ */
+ if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
+ wtype = rpcrdma_noch;
+ else if (rqst->rq_rcv_buf.page_len == 0)
+ wtype = rpcrdma_replych;
+ else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
+ wtype = rpcrdma_writech;
+ else
+ wtype = rpcrdma_replych;
+
+ /*
+ * Chunks needed for arguments?
+ *
+ * o If the total request is under the inline threshold, all ops
+ * are sent as inline.
+ * o Large non-write ops are sent with the entire message as a
+ * single read chunk (protocol 0-position special case).
+ * o Large write ops transmit data as read chunk(s), header as
+ * inline.
+ *
+ * Note: the NFS code sending down multiple argument segments
+ * implies the op is a write.
+ * TBD check NFSv4 setacl
+ */
+ if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
+ rtype = rpcrdma_noch;
+ else if (rqst->rq_snd_buf.page_len == 0)
+ rtype = rpcrdma_areadch;
+ else
+ rtype = rpcrdma_readch;
+
+ /* The following simplification is not true forever */
+ if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+ wtype = rpcrdma_noch;
+ BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
+
+ if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
+ (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
+ /* forced to "pure inline"? */
+ dprintk("RPC: %s: too much data (%d/%d) for inline\n",
+ __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
+ return -1;
+ }
+
+ hdrlen = 28; /*sizeof *headerp;*/
+ padlen = 0;
+
+ /*
+ * Pull up any extra send data into the preregistered buffer.
+ * When padding is in use and applies to the transfer, insert
+ * it and change the message type.
+ */
+ if (rtype == rpcrdma_noch) {
+
+ padlen = rpcrdma_inline_pullup(rqst,
+ RPCRDMA_INLINE_PAD_VALUE(rqst));
+
+ if (padlen) {
+ headerp->rm_type = __constant_htonl(RDMA_MSGP);
+ headerp->rm_body.rm_padded.rm_align =
+ htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
+ headerp->rm_body.rm_padded.rm_thresh =
+ __constant_htonl(RPCRDMA_INLINE_PAD_THRESH);
+ headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
+ headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
+ headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
+ hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
+ BUG_ON(wtype != rpcrdma_noch);
+
+ } else {
+ headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
+ headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
+ headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
+ /* new length after pullup */
+ rpclen = rqst->rq_svec[0].iov_len;
+ /*
+ * Currently we try to not actually use read inline.
+ * Reply chunks have the desirable property that
+ * they land, packed, directly in the target buffers
+ * without headers, so they require no fixup. The
+ * additional RDMA Write op sends the same amount
+ * of data, streams on-the-wire and adds no overhead
+ * on receive. Therefore, we request a reply chunk
+ * for non-writes wherever feasible and efficient.
+ */
+ if (wtype == rpcrdma_noch &&
+ r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+ wtype = rpcrdma_replych;
+ }
+ }
+
+ /*
+ * Marshal chunks. This routine will return the header length
+ * consumed by marshaling.
+ */
+ if (rtype != rpcrdma_noch) {
+ hdrlen = rpcrdma_create_chunks(rqst,
+ &rqst->rq_snd_buf, headerp, rtype);
+ wtype = rtype; /* simplify dprintk */
+
+ } else if (wtype != rpcrdma_noch) {
+ hdrlen = rpcrdma_create_chunks(rqst,
+ &rqst->rq_rcv_buf, headerp, wtype);
+ }
+
+ if (hdrlen == 0)
+ return -1;
+
+ dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
+ " headerp 0x%p base 0x%p lkey 0x%x\n",
+ __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
+ headerp, base, req->rl_iov.lkey);
+
+ /*
+ * initialize send_iov's - normally only two: rdma chunk header and
+ * single preregistered RPC header buffer, but if padding is present,
+ * then use a preregistered (and zeroed) pad buffer between the RPC
+ * header and any write data. In all non-rdma cases, any following
+ * data has been copied into the RPC header buffer.
+ */
+ req->rl_send_iov[0].addr = req->rl_iov.addr;
+ req->rl_send_iov[0].length = hdrlen;
+ req->rl_send_iov[0].lkey = req->rl_iov.lkey;
+
+ req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
+ req->rl_send_iov[1].length = rpclen;
+ req->rl_send_iov[1].lkey = req->rl_iov.lkey;
+
+ req->rl_niovs = 2;
+
+ if (padlen) {
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+ req->rl_send_iov[2].addr = ep->rep_pad.addr;
+ req->rl_send_iov[2].length = padlen;
+ req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
+
+ req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
+ req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
+ req->rl_send_iov[3].lkey = req->rl_iov.lkey;
+
+ req->rl_niovs = 4;
+ }
+
+ return 0;
+}
+
+/*
+ * Chase down a received write or reply chunklist to get length
+ * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
+ */
+static int
+rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
+{
+ unsigned int i, total_len;
+ struct rpcrdma_write_chunk *cur_wchunk;
+
+ i = ntohl(**iptrp); /* get array count */
+ if (i > max)
+ return -1;
+ cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
+ total_len = 0;
+ while (i--) {
+ struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
+ ifdebug(FACILITY) {
+ u64 off;
+ xdr_decode_hyper((u32 *)&seg->rs_offset, &off);
+ dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
+ __func__,
+ ntohl(seg->rs_length),
+ off,
+ ntohl(seg->rs_handle));
+ }
+ total_len += ntohl(seg->rs_length);
+ ++cur_wchunk;
+ }
+ /* check and adjust for properly terminated write chunk */
+ if (wrchunk) {
+ u32 *w = (u32 *) cur_wchunk;
+ if (*w++ != xdr_zero)
+ return -1;
+ cur_wchunk = (struct rpcrdma_write_chunk *) w;
+ }
+ if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+ return -1;
+
+ *iptrp = (u32 *) cur_wchunk;
+ return total_len;
+}
+
+/*
+ * Scatter inline received data back into provided iov's.
+ */
+static void
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+{
+ int i, npages, curlen, olen;
+ char *destp;
+
+ curlen = rqst->rq_rcv_buf.head[0].iov_len;
+ if (curlen > copy_len) { /* write chunk header fixup */
+ curlen = copy_len;
+ rqst->rq_rcv_buf.head[0].iov_len = curlen;
+ }
+
+ dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
+ __func__, srcp, copy_len, curlen);
+
+ /* Shift pointer for first receive segment only */
+ rqst->rq_rcv_buf.head[0].iov_base = srcp;
+ srcp += curlen;
+ copy_len -= curlen;
+
+ olen = copy_len;
+ i = 0;
+ rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
+ if (copy_len && rqst->rq_rcv_buf.page_len) {
+ npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
+ rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
+ for (; i < npages; i++) {
+ if (i == 0)
+ curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
+ else
+ curlen = PAGE_SIZE;
+ if (curlen > copy_len)
+ curlen = copy_len;
+ dprintk("RPC: %s: page %d"
+ " srcp 0x%p len %d curlen %d\n",
+ __func__, i, srcp, copy_len, curlen);
+ destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
+ KM_SKB_SUNRPC_DATA);
+ if (i == 0)
+ memcpy(destp + rqst->rq_rcv_buf.page_base,
+ srcp, curlen);
+ else
+ memcpy(destp, srcp, curlen);
+ flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
+ kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
+ srcp += curlen;
+ copy_len -= curlen;
+ if (copy_len == 0)
+ break;
+ }
+ rqst->rq_rcv_buf.page_len = olen - copy_len;
+ } else
+ rqst->rq_rcv_buf.page_len = 0;
+
+ if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
+ curlen = copy_len;
+ if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
+ curlen = rqst->rq_rcv_buf.tail[0].iov_len;
+ if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
+ memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+ dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
+ __func__, srcp, copy_len, curlen);
+ rqst->rq_rcv_buf.tail[0].iov_len = curlen;
+ copy_len -= curlen; ++i;
+ } else
+ rqst->rq_rcv_buf.tail[0].iov_len = 0;
+
+ if (copy_len)
+ dprintk("RPC: %s: %d bytes in"
+ " %d extra segments (%d lost)\n",
+ __func__, olen, i, copy_len);
+
+ /* TBD avoid a warning from call_decode() */
+ rqst->rq_private_buf = rqst->rq_rcv_buf;
+}
+
+/*
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
+ */
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
+{
+ struct rpc_xprt *xprt = ep->rep_xprt;
+
+ spin_lock_bh(&xprt->transport_lock);
+ if (ep->rep_connected > 0) {
+ if (!xprt_test_and_set_connected(xprt))
+ xprt_wake_pending_tasks(xprt, 0);
+ } else {
+ if (xprt_test_and_clear_connected(xprt))
+ xprt_wake_pending_tasks(xprt, ep->rep_connected);
+ }
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
+/*
+ * This function is called when memory window unbind which we are waiting
+ * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ */
+static void
+rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+{
+ wake_up(&rep->rr_unbind);
+}
+
+/*
+ * Called as a tasklet to do req/reply match and complete a request
+ * Errors must result in the RPC task either being awakened, or
+ * allowed to timeout, to discover the errors at that time.
+ */
+void
+rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_msg *headerp;
+ struct rpcrdma_req *req;
+ struct rpc_rqst *rqst;
+ struct rpc_xprt *xprt = rep->rr_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ u32 *iptr;
+ int i, rdmalen, status;
+
+ /* Check status. If bad, signal disconnect and return rep to pool */
+ if (rep->rr_len == ~0U) {
+ rpcrdma_recv_buffer_put(rep);
+ if (r_xprt->rx_ep.rep_connected == 1) {
+ r_xprt->rx_ep.rep_connected = -EIO;
+ rpcrdma_conn_func(&r_xprt->rx_ep);
+ }
+ return;
+ }
+ if (rep->rr_len < 28) {
+ dprintk("RPC: %s: short/invalid reply\n", __func__);
+ goto repost;
+ }
+ headerp = (struct rpcrdma_msg *) rep->rr_base;
+ if (headerp->rm_vers != xdr_one) {
+ dprintk("RPC: %s: invalid version %d\n",
+ __func__, ntohl(headerp->rm_vers));
+ goto repost;
+ }
+
+ /* Get XID and try for a match. */
+ spin_lock(&xprt->transport_lock);
+ rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+ if (rqst == NULL) {
+ spin_unlock(&xprt->transport_lock);
+ dprintk("RPC: %s: reply 0x%p failed "
+ "to match any request xid 0x%08x len %d\n",
+ __func__, rep, headerp->rm_xid, rep->rr_len);
+repost:
+ r_xprt->rx_stats.bad_reply_count++;
+ rep->rr_func = rpcrdma_reply_handler;
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ rpcrdma_recv_buffer_put(rep);
+
+ return;
+ }
+
+ /* get request object */
+ req = rpcr_to_rdmar(rqst);
+
+ dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
+ " RPC request 0x%p xid 0x%08x\n",
+ __func__, rep, req, rqst, headerp->rm_xid);
+
+ BUG_ON(!req || req->rl_reply);
+
+ /* from here on, the reply is no longer an orphan */
+ req->rl_reply = rep;
+
+ /* check for expected message types */
+ /* The order of some of these tests is important. */
+ switch (headerp->rm_type) {
+ case __constant_htonl(RDMA_MSG):
+ /* never expect read chunks */
+ /* never expect reply chunks (two ways to check) */
+ /* never expect write chunks without having offered RDMA */
+ if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+ (headerp->rm_body.rm_chunks[1] == xdr_zero &&
+ headerp->rm_body.rm_chunks[2] != xdr_zero) ||
+ (headerp->rm_body.rm_chunks[1] != xdr_zero &&
+ req->rl_nchunks == 0))
+ goto badheader;
+ if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
+ /* count any expected write chunks in read reply */
+ /* start at write chunk array count */
+ iptr = &headerp->rm_body.rm_chunks[2];
+ rdmalen = rpcrdma_count_chunks(rep,
+ req->rl_nchunks, 1, &iptr);
+ /* check for validity, and no reply chunk after */
+ if (rdmalen < 0 || *iptr++ != xdr_zero)
+ goto badheader;
+ rep->rr_len -=
+ ((unsigned char *)iptr - (unsigned char *)headerp);
+ status = rep->rr_len + rdmalen;
+ r_xprt->rx_stats.total_rdma_reply += rdmalen;
+ } else {
+ /* else ordinary inline */
+ iptr = (u32 *)((unsigned char *)headerp + 28);
+ rep->rr_len -= 28; /*sizeof *headerp;*/
+ status = rep->rr_len;
+ }
+ /* Fix up the rpc results for upper layer */
+ rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+ break;
+
+ case __constant_htonl(RDMA_NOMSG):
+ /* never expect read or write chunks, always reply chunks */
+ if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+ headerp->rm_body.rm_chunks[1] != xdr_zero ||
+ headerp->rm_body.rm_chunks[2] != xdr_one ||
+ req->rl_nchunks == 0)
+ goto badheader;
+ iptr = (u32 *)((unsigned char *)headerp + 28);
+ rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
+ if (rdmalen < 0)
+ goto badheader;
+ r_xprt->rx_stats.total_rdma_reply += rdmalen;
+ /* Reply chunk buffer already is the reply vector - no fixup. */
+ status = rdmalen;
+ break;
+
+badheader:
+ default:
+ dprintk("%s: invalid rpcrdma reply header (type %d):"
+ " chunks[012] == %d %d %d"
+ " expected chunks <= %d\n",
+ __func__, ntohl(headerp->rm_type),
+ headerp->rm_body.rm_chunks[0],
+ headerp->rm_body.rm_chunks[1],
+ headerp->rm_body.rm_chunks[2],
+ req->rl_nchunks);
+ status = -EIO;
+ r_xprt->rx_stats.bad_reply_count++;
+ break;
+ }
+
+ /* If using mw bind, start the deregister process now. */
+ /* (Note: if mr_free(), cannot perform it here, in tasklet context) */
+ if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
+ case RPCRDMA_MEMWINDOWS:
+ for (i = 0; req->rl_nchunks-- > 1;)
+ i += rpcrdma_deregister_external(
+ &req->rl_segments[i], r_xprt, NULL);
+ /* Optionally wait (not here) for unbinds to complete */
+ rep->rr_func = rpcrdma_unbind_func;
+ (void) rpcrdma_deregister_external(&req->rl_segments[i],
+ r_xprt, rep);
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ for (i = 0; req->rl_nchunks--;)
+ i += rpcrdma_deregister_external(&req->rl_segments[i],
+ r_xprt, NULL);
+ break;
+ default:
+ break;
+ }
+
+ dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+ __func__, xprt, rqst, status);
+ xprt_complete_rqst(rqst->rq_task, status);
+ spin_unlock(&xprt->transport_lock);
+}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
new file mode 100644
index 0000000..dc55cc97
--- /dev/null
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * transport.c
+ *
+ * This file contains the top-level implementation of an RPC RDMA
+ * transport.
+ *
+ * Naming convention: functions beginning with xprt_ are part of the
+ * transport switch. All others are RPC RDMA internal.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include "xprt_rdma.h"
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
+MODULE_AUTHOR("Network Appliance, Inc.");
+
+/*
+ * tunables
+ */
+
+static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_inline_write_padding;
+#if !RPCRDMA_PERSISTENT_REGISTRATION
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
+#else
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
+#endif
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
+static unsigned int zero;
+static unsigned int max_padding = PAGE_SIZE;
+static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
+static unsigned int max_memreg = RPCRDMA_LAST - 1;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+static ctl_table xr_tunables_table[] = {
+ {
+ .ctl_name = CTL_SLOTTABLE_RDMA,
+ .procname = "rdma_slot_table_entries",
+ .data = &xprt_rdma_slot_table_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_slot_table_size,
+ .extra2 = &max_slot_table_size
+ },
+ {
+ .ctl_name = CTL_RDMA_MAXINLINEREAD,
+ .procname = "rdma_max_inline_read",
+ .data = &xprt_rdma_max_inline_read,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = CTL_RDMA_MAXINLINEWRITE,
+ .procname = "rdma_max_inline_write",
+ .data = &xprt_rdma_max_inline_write,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = CTL_RDMA_WRITEPADDING,
+ .procname = "rdma_inline_write_padding",
+ .data = &xprt_rdma_inline_write_padding,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &max_padding,
+ },
+ {
+ .ctl_name = CTL_RDMA_MEMREG,
+ .procname = "rdma_memreg_strategy",
+ .data = &xprt_rdma_memreg_strategy,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_memreg,
+ .extra2 = &max_memreg,
+ },
+ {
+ .ctl_name = 0,
+ },
+};
+
+static ctl_table sunrpc_table[] = {
+ {
+ .ctl_name = CTL_SUNRPC,
+ .procname = "sunrpc",
+ .mode = 0555,
+ .child = xr_tunables_table
+ },
+ {
+ .ctl_name = 0,
+ },
+};
+
+#endif
+
+static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
+
+static void
+xprt_rdma_format_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *)
+ &rpcx_to_rdmad(xprt).addr;
+ char *buf;
+
+ buf = kzalloc(20, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr));
+ xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 8, "%u", ntohs(addr->sin_port));
+ xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+ xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
+
+ buf = kzalloc(48, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port), "rdma");
+ xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+ buf = kzalloc(10, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 10, "%02x%02x%02x%02x",
+ NIPQUAD(addr->sin_addr.s_addr));
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+ buf = kzalloc(30, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port) >> 8,
+ ntohs(addr->sin_port) & 0xff);
+ xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+ /* netid */
+ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+}
+
+static void
+xprt_rdma_free_addresses(struct rpc_xprt *xprt)
+{
+ kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+ kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
+}
+
+static void
+xprt_rdma_connect_worker(struct work_struct *work)
+{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(work, struct rpcrdma_xprt, rdma_connect.work);
+ struct rpc_xprt *xprt = &r_xprt->xprt;
+ int rc = 0;
+
+ if (!xprt->shutdown) {
+ xprt_clear_connected(xprt);
+
+ dprintk("RPC: %s: %sconnect\n", __func__,
+ r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+ rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ goto out;
+ }
+ goto out_clear;
+
+out:
+ xprt_wake_pending_tasks(xprt, rc);
+
+out_clear:
+ dprintk("RPC: %s: exit\n", __func__);
+ xprt_clear_connecting(xprt);
+}
+
+/*
+ * xprt_rdma_destroy
+ *
+ * Destroy the xprt.
+ * Free all memory associated with the object, including its own.
+ * NOTE: none of the *destroy methods free memory for their top-level
+ * objects, even though they may have allocated it (they do free
+ * private memory). It's up to the caller to handle it. In this
+ * case (RDMA transport), all structure memory is inlined with the
+ * struct rpcrdma_xprt.
+ */
+static void
+xprt_rdma_destroy(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int rc;
+
+ dprintk("RPC: %s: called\n", __func__);
+
+ cancel_delayed_work(&r_xprt->rdma_connect);
+ flush_scheduled_work();
+
+ xprt_clear_connected(xprt);
+
+ rpcrdma_buffer_destroy(&r_xprt->rx_buf);
+ rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
+ __func__, rc);
+ rpcrdma_ia_close(&r_xprt->rx_ia);
+
+ xprt_rdma_free_addresses(xprt);
+
+ kfree(xprt->slot);
+ xprt->slot = NULL;
+ kfree(xprt);
+
+ dprintk("RPC: %s: returning\n", __func__);
+
+ module_put(THIS_MODULE);
+}
+
+/**
+ * xprt_setup_rdma - Set up transport to use RDMA
+ *
+ * @args: rpc transport arguments
+ */
+static struct rpc_xprt *
+xprt_setup_rdma(struct xprt_create *args)
+{
+ struct rpcrdma_create_data_internal cdata;
+ struct rpc_xprt *xprt;
+ struct rpcrdma_xprt *new_xprt;
+ struct rpcrdma_ep *new_ep;
+ struct sockaddr_in *sin;
+ int rc;
+
+ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: %s: address too large\n", __func__);
+ return ERR_PTR(-EBADF);
+ }
+
+ xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+ if (xprt == NULL) {
+ dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xprt->max_reqs = xprt_rdma_slot_table_entries;
+ xprt->slot = kcalloc(xprt->max_reqs,
+ sizeof(struct rpc_rqst), GFP_KERNEL);
+ if (xprt->slot == NULL) {
+ kfree(xprt);
+ dprintk("RPC: %s: couldn't allocate %d slots\n",
+ __func__, xprt->max_reqs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* 60 second timeout, no retries */
+ xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ);
+ xprt->bind_timeout = (60U * HZ);
+ xprt->connect_timeout = (60U * HZ);
+ xprt->reestablish_timeout = (5U * HZ);
+ xprt->idle_timeout = (5U * 60 * HZ);
+
+ xprt->resvport = 0; /* privileged port not needed */
+ xprt->tsh_size = 0; /* RPC-RDMA handles framing */
+ xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
+ xprt->ops = &xprt_rdma_procs;
+
+ /*
+ * Set up RDMA-specific connect data.
+ */
+
+ /* Put server RDMA address in local cdata */
+ memcpy(&cdata.addr, args->dstaddr, args->addrlen);
+
+ /* Ensure xprt->addr holds valid server TCP (not RDMA)
+ * address, for any side protocols which peek at it */
+ xprt->prot = IPPROTO_TCP;
+ xprt->addrlen = args->addrlen;
+ memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
+
+ sin = (struct sockaddr_in *)&cdata.addr;
+ if (ntohs(sin->sin_port) != 0)
+ xprt_set_bound(xprt);
+
+ dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__,
+ NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
+
+ /* Set max requests */
+ cdata.max_requests = xprt->max_reqs;
+
+ /* Set some length limits */
+ cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
+ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
+
+ cdata.inline_wsize = xprt_rdma_max_inline_write;
+ if (cdata.inline_wsize > cdata.wsize)
+ cdata.inline_wsize = cdata.wsize;
+
+ cdata.inline_rsize = xprt_rdma_max_inline_read;
+ if (cdata.inline_rsize > cdata.rsize)
+ cdata.inline_rsize = cdata.rsize;
+
+ cdata.padding = xprt_rdma_inline_write_padding;
+
+ /*
+ * Create new transport instance, which includes initialized
+ * o ia
+ * o endpoint
+ * o buffers
+ */
+
+ new_xprt = rpcx_to_rdmax(xprt);
+
+ rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
+ xprt_rdma_memreg_strategy);
+ if (rc)
+ goto out1;
+
+ /*
+ * initialize and create ep
+ */
+ new_xprt->rx_data = cdata;
+ new_ep = &new_xprt->rx_ep;
+ new_ep->rep_remote_addr = cdata.addr;
+
+ rc = rpcrdma_ep_create(&new_xprt->rx_ep,
+ &new_xprt->rx_ia, &new_xprt->rx_data);
+ if (rc)
+ goto out2;
+
+ /*
+ * Allocate pre-registered send and receive buffers for headers and
+ * any inline data. Also specify any padding which will be provided
+ * from a preregistered zero buffer.
+ */
+ rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
+ &new_xprt->rx_data);
+ if (rc)
+ goto out3;
+
+ /*
+ * Register a callback for connection events. This is necessary because
+ * connection loss notification is async. We also catch connection loss
+ * when reaping receives.
+ */
+ INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
+ new_ep->rep_func = rpcrdma_conn_func;
+ new_ep->rep_xprt = xprt;
+
+ xprt_rdma_format_addresses(xprt);
+
+ if (!try_module_get(THIS_MODULE))
+ goto out4;
+
+ return xprt;
+
+out4:
+ xprt_rdma_free_addresses(xprt);
+ rc = -EINVAL;
+out3:
+ (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+out2:
+ rpcrdma_ia_close(&new_xprt->rx_ia);
+out1:
+ kfree(xprt->slot);
+ kfree(xprt);
+ return ERR_PTR(rc);
+}
+
+/*
+ * Close a connection, during shutdown or timeout/reconnect
+ */
+static void
+xprt_rdma_close(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ dprintk("RPC: %s: closing\n", __func__);
+ xprt_disconnect(xprt);
+ (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+}
+
+static void
+xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
+{
+ struct sockaddr_in *sap;
+
+ sap = (struct sockaddr_in *)&xprt->addr;
+ sap->sin_port = htons(port);
+ sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
+ sap->sin_port = htons(port);
+ dprintk("RPC: %s: %u\n", __func__, port);
+}
+
+static void
+xprt_rdma_connect(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ if (!xprt_test_and_set_connecting(xprt)) {
+ if (r_xprt->rx_ep.rep_connected != 0) {
+ /* Reconnect */
+ schedule_delayed_work(&r_xprt->rdma_connect,
+ xprt->reestablish_timeout);
+ } else {
+ schedule_delayed_work(&r_xprt->rdma_connect, 0);
+ if (!RPC_IS_ASYNC(task))
+ flush_scheduled_work();
+ }
+ }
+}
+
+static int
+xprt_rdma_reserve_xprt(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
+
+ /* == RPC_CWNDSCALE @ init, but *after* setup */
+ if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
+ r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
+ dprintk("RPC: %s: cwndscale %lu\n", __func__,
+ r_xprt->rx_buf.rb_cwndscale);
+ BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
+ }
+ xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
+ return xprt_reserve_xprt_cong(task);
+}
+
+/*
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
+ * sequence. For this reason, the recv buffers are attached to send
+ * buffers for portions of the RPC. Note that the RPC layer allocates
+ * both send and receive buffers in the same call. We may register
+ * the receive buffer portion when using reply chunks.
+ */
+static void *
+xprt_rdma_allocate(struct rpc_task *task, size_t size)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpcrdma_req *req, *nreq;
+
+ req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
+ BUG_ON(NULL == req);
+
+ if (size > req->rl_size) {
+ dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
+ "prog %d vers %d proc %d\n",
+ __func__, size, req->rl_size,
+ task->tk_client->cl_prog, task->tk_client->cl_vers,
+ task->tk_msg.rpc_proc->p_proc);
+ /*
+ * Outgoing length shortage. Our inline write max must have
+ * been configured to perform direct i/o.
+ *
+ * This is therefore a large metadata operation, and the
+ * allocate call was made on the maximum possible message,
+ * e.g. containing long filename(s) or symlink data. In
+ * fact, while these metadata operations *might* carry
+ * large outgoing payloads, they rarely *do*. However, we
+ * have to commit to the request here, so reallocate and
+ * register it now. The data path will never require this
+ * reallocation.
+ *
+ * If the allocation or registration fails, the RPC framework
+ * will (doggedly) retry.
+ */
+ if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
+ RPCRDMA_BOUNCEBUFFERS) {
+ /* forced to "pure inline" */
+ dprintk("RPC: %s: too much data (%zd) for inline "
+ "(r/w max %d/%d)\n", __func__, size,
+ rpcx_to_rdmad(xprt).inline_rsize,
+ rpcx_to_rdmad(xprt).inline_wsize);
+ size = req->rl_size;
+ rpc_exit(task, -EIO); /* fail the operation */
+ rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+ goto out;
+ }
+ if (task->tk_flags & RPC_TASK_SWAPPER)
+ nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
+ else
+ nreq = kmalloc(sizeof *req + size, GFP_NOFS);
+ if (nreq == NULL)
+ goto outfail;
+
+ if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
+ nreq->rl_base, size + sizeof(struct rpcrdma_req)
+ - offsetof(struct rpcrdma_req, rl_base),
+ &nreq->rl_handle, &nreq->rl_iov)) {
+ kfree(nreq);
+ goto outfail;
+ }
+ rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
+ nreq->rl_size = size;
+ nreq->rl_niovs = 0;
+ nreq->rl_nchunks = 0;
+ nreq->rl_buffer = (struct rpcrdma_buffer *)req;
+ nreq->rl_reply = req->rl_reply;
+ memcpy(nreq->rl_segments,
+ req->rl_segments, sizeof nreq->rl_segments);
+ /* flag the swap with an unused field */
+ nreq->rl_iov.length = 0;
+ req->rl_reply = NULL;
+ req = nreq;
+ }
+ dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
+out:
+ return req->rl_xdr_buf;
+
+outfail:
+ rpcrdma_buffer_put(req);
+ rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+ return NULL;
+}
+
+/*
+ * This function returns all RDMA resources to the pool.
+ */
+static void
+xprt_rdma_free(void *buffer)
+{
+ struct rpcrdma_req *req;
+ struct rpcrdma_xprt *r_xprt;
+ struct rpcrdma_rep *rep;
+ int i;
+
+ if (buffer == NULL)
+ return;
+
+ req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
+ r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
+ rep = req->rl_reply;
+
+ dprintk("RPC: %s: called on 0x%p%s\n",
+ __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
+
+ /*
+ * Finish the deregistration. When using mw bind, this was
+ * begun in rpcrdma_reply_handler(). In all other modes, we
+ * do it here, in thread context. The process is considered
+ * complete when the rr_func vector becomes NULL - this
+ * was put in place during rpcrdma_reply_handler() - the wait
+ * call below will not block if the dereg is "done". If
+ * interrupted, our framework will clean up.
+ */
+ for (i = 0; req->rl_nchunks;) {
+ --req->rl_nchunks;
+ i += rpcrdma_deregister_external(
+ &req->rl_segments[i], r_xprt, NULL);
+ }
+
+ if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
+ rep->rr_func = NULL; /* abandon the callback */
+ req->rl_reply = NULL;
+ }
+
+ if (req->rl_iov.length == 0) { /* see allocate above */
+ struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
+ oreq->rl_reply = req->rl_reply;
+ (void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
+ req->rl_handle,
+ &req->rl_iov);
+ kfree(req);
+ req = oreq;
+ }
+
+ /* Put back request+reply buffers */
+ rpcrdma_buffer_put(req);
+}
+
+/*
+ * send_request invokes the meat of RPC RDMA. It must do the following:
+ * 1. Marshal the RPC request into an RPC RDMA request, which means
+ * putting a header in front of data, and creating IOVs for RDMA
+ * from those in the request.
+ * 2. In marshaling, detect opportunities for RDMA, and use them.
+ * 3. Post a recv message to set up asynch completion, then send
+ * the request (rpcrdma_ep_post).
+ * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ */
+
+static int
+xprt_rdma_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ /* marshal the send itself */
+ if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
+ r_xprt->rx_stats.failed_marshal_count++;
+ dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
+ __func__);
+ return -EIO;
+ }
+
+ if (req->rl_reply == NULL) /* e.g. reconnection */
+ rpcrdma_recv_buffer_get(req);
+
+ if (req->rl_reply) {
+ req->rl_reply->rr_func = rpcrdma_reply_handler;
+ /* this need only be done once, but... */
+ req->rl_reply->rr_xprt = xprt;
+ }
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
+ xprt_disconnect(xprt);
+ return -ENOTCONN; /* implies disconnect */
+ }
+
+ rqst->rq_bytes_sent = 0;
+ return 0;
+}
+
+static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ long idle_time = 0;
+
+ if (xprt_connected(xprt))
+ idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+ seq_printf(seq,
+ "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
+ "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
+
+ 0, /* need a local port? */
+ xprt->stat.bind_count,
+ xprt->stat.connect_count,
+ xprt->stat.connect_time,
+ idle_time,
+ xprt->stat.sends,
+ xprt->stat.recvs,
+ xprt->stat.bad_xids,
+ xprt->stat.req_u,
+ xprt->stat.bklog_u,
+
+ r_xprt->rx_stats.read_chunk_count,
+ r_xprt->rx_stats.write_chunk_count,
+ r_xprt->rx_stats.reply_chunk_count,
+ r_xprt->rx_stats.total_rdma_request,
+ r_xprt->rx_stats.total_rdma_reply,
+ r_xprt->rx_stats.pullup_copy_count,
+ r_xprt->rx_stats.fixup_copy_count,
+ r_xprt->rx_stats.hardway_register_count,
+ r_xprt->rx_stats.failed_marshal_count,
+ r_xprt->rx_stats.bad_reply_count);
+}
+
+/*
+ * Plumbing for rpc transport switch and kernel module
+ */
+
+static struct rpc_xprt_ops xprt_rdma_procs = {
+ .reserve_xprt = xprt_rdma_reserve_xprt,
+ .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
+ .release_request = xprt_release_rqst_cong, /* ditto */
+ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
+ .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
+ .set_port = xprt_rdma_set_port,
+ .connect = xprt_rdma_connect,
+ .buf_alloc = xprt_rdma_allocate,
+ .buf_free = xprt_rdma_free,
+ .send_request = xprt_rdma_send_request,
+ .close = xprt_rdma_close,
+ .destroy = xprt_rdma_destroy,
+ .print_stats = xprt_rdma_print_stats
+};
+
+static struct xprt_class xprt_rdma = {
+ .list = LIST_HEAD_INIT(xprt_rdma.list),
+ .name = "rdma",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_RDMA,
+ .setup = xprt_setup_rdma,
+};
+
+static void __exit xprt_rdma_cleanup(void)
+{
+ int rc;
+
+ dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+#ifdef RPC_DEBUG
+ if (sunrpc_table_header) {
+ unregister_sysctl_table(sunrpc_table_header);
+ sunrpc_table_header = NULL;
+ }
+#endif
+ rc = xprt_unregister_transport(&xprt_rdma);
+ if (rc)
+ dprintk("RPC: %s: xprt_unregister returned %i\n",
+ __func__, rc);
+}
+
+static int __init xprt_rdma_init(void)
+{
+ int rc;
+
+ rc = xprt_register_transport(&xprt_rdma);
+
+ if (rc)
+ return rc;
+
+ dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
+
+ dprintk(KERN_INFO "Defaults:\n");
+ dprintk(KERN_INFO "\tSlots %d\n"
+ "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
+ xprt_rdma_slot_table_entries,
+ xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
+ dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
+ xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+
+#ifdef RPC_DEBUG
+ if (!sunrpc_table_header)
+ sunrpc_table_header = register_sysctl_table(sunrpc_table);
+#endif
+ return 0;
+}
+
+module_init(xprt_rdma_init);
+module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
new file mode 100644
index 0000000..9ec8ca4
--- /dev/null
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -0,0 +1,1626 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * verbs.c
+ *
+ * Encapsulates the major functions managing:
+ * o adapters
+ * o endpoints
+ * o connections
+ * o buffer memory
+ */
+
+#include <linux/pci.h> /* for Tavor hack below */
+
+#include "xprt_rdma.h"
+
+/*
+ * Globals/Macros
+ */
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+/*
+ * internal functions
+ */
+
+/*
+ * handle replies in tasklet context, using a single, global list
+ * rdma tasklet function -- just turn around and call the func
+ * for all replies on the list
+ */
+
+static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
+static LIST_HEAD(rpcrdma_tasklets_g);
+
+static void
+rpcrdma_run_tasklet(unsigned long data)
+{
+ struct rpcrdma_rep *rep;
+ void (*func)(struct rpcrdma_rep *);
+ unsigned long flags;
+
+ data = data;
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ while (!list_empty(&rpcrdma_tasklets_g)) {
+ rep = list_entry(rpcrdma_tasklets_g.next,
+ struct rpcrdma_rep, rr_list);
+ list_del(&rep->rr_list);
+ func = rep->rr_func;
+ rep->rr_func = NULL;
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+
+ if (func)
+ func(rep);
+ else
+ rpcrdma_recv_buffer_put(rep);
+
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ }
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+}
+
+static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
+
+static inline void
+rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+ tasklet_schedule(&rpcrdma_tasklet_g);
+}
+
+static void
+rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+ struct rpcrdma_ep *ep = context;
+
+ dprintk("RPC: %s: QP error %X on device %s ep %p\n",
+ __func__, event->event, event->device->name, context);
+ if (ep->rep_connected == 1) {
+ ep->rep_connected = -EIO;
+ ep->rep_func(ep);
+ wake_up_all(&ep->rep_connect_wait);
+ }
+}
+
+static void
+rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
+{
+ struct rpcrdma_ep *ep = context;
+
+ dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
+ __func__, event->event, event->device->name, context);
+ if (ep->rep_connected == 1) {
+ ep->rep_connected = -EIO;
+ ep->rep_func(ep);
+ wake_up_all(&ep->rep_connect_wait);
+ }
+}
+
+static inline
+void rpcrdma_event_process(struct ib_wc *wc)
+{
+ struct rpcrdma_rep *rep =
+ (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+
+ dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
+ __func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+ if (!rep) /* send or bind completion that we don't care about */
+ return;
+
+ if (IB_WC_SUCCESS != wc->status) {
+ dprintk("RPC: %s: %s WC status %X, connection lost\n",
+ __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
+ wc->status);
+ rep->rr_len = ~0U;
+ rpcrdma_schedule_tasklet(rep);
+ return;
+ }
+
+ switch (wc->opcode) {
+ case IB_WC_RECV:
+ rep->rr_len = wc->byte_len;
+ ib_dma_sync_single_for_cpu(
+ rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+ rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+ /* Keep (only) the most recent credits, after check validity */
+ if (rep->rr_len >= 16) {
+ struct rpcrdma_msg *p =
+ (struct rpcrdma_msg *) rep->rr_base;
+ unsigned int credits = ntohl(p->rm_credit);
+ if (credits == 0) {
+ dprintk("RPC: %s: server"
+ " dropped credits to 0!\n", __func__);
+ /* don't deadlock */
+ credits = 1;
+ } else if (credits > rep->rr_buffer->rb_max_requests) {
+ dprintk("RPC: %s: server"
+ " over-crediting: %d (%d)\n",
+ __func__, credits,
+ rep->rr_buffer->rb_max_requests);
+ credits = rep->rr_buffer->rb_max_requests;
+ }
+ atomic_set(&rep->rr_buffer->rb_credits, credits);
+ }
+ /* fall through */
+ case IB_WC_BIND_MW:
+ rpcrdma_schedule_tasklet(rep);
+ break;
+ default:
+ dprintk("RPC: %s: unexpected WC event %X\n",
+ __func__, wc->opcode);
+ break;
+ }
+}
+
+static inline int
+rpcrdma_cq_poll(struct ib_cq *cq)
+{
+ struct ib_wc wc;
+ int rc;
+
+ for (;;) {
+ rc = ib_poll_cq(cq, 1, &wc);
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_poll_cq failed %i\n",
+ __func__, rc);
+ return rc;
+ }
+ if (rc == 0)
+ break;
+
+ rpcrdma_event_process(&wc);
+ }
+
+ return 0;
+}
+
+/*
+ * rpcrdma_cq_event_upcall
+ *
+ * This upcall handles recv, send, bind and unbind events.
+ * It is reentrant but processes single events in order to maintain
+ * ordering of receives to keep server credits.
+ *
+ * It is the responsibility of the scheduled tasklet to return
+ * recv buffers to the pool. NOTE: this affects synchronization of
+ * connection shutdown. That is, the structures required for
+ * the completion of the reply handler must remain intact until
+ * all memory has been reclaimed.
+ *
+ * Note that send events are suppressed and do not result in an upcall.
+ */
+static void
+rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+{
+ int rc;
+
+ rc = rpcrdma_cq_poll(cq);
+ if (rc)
+ return;
+
+ rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
+ __func__, rc);
+ return;
+ }
+
+ rpcrdma_cq_poll(cq);
+}
+
+#ifdef RPC_DEBUG
+static const char * const conn[] = {
+ "address resolved",
+ "address error",
+ "route resolved",
+ "route error",
+ "connect request",
+ "connect response",
+ "connect error",
+ "unreachable",
+ "rejected",
+ "established",
+ "disconnected",
+ "device removal"
+};
+#endif
+
+static int
+rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+ struct rpcrdma_xprt *xprt = id->context;
+ struct rpcrdma_ia *ia = &xprt->rx_ia;
+ struct rpcrdma_ep *ep = &xprt->rx_ep;
+ struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+ struct ib_qp_attr attr;
+ struct ib_qp_init_attr iattr;
+ int connstate = 0;
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ complete(&ia->ri_done);
+ break;
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ ia->ri_async_rc = -EHOSTUNREACH;
+ dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
+ __func__, ep);
+ complete(&ia->ri_done);
+ break;
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ ia->ri_async_rc = -ENETUNREACH;
+ dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
+ __func__, ep);
+ complete(&ia->ri_done);
+ break;
+ case RDMA_CM_EVENT_ESTABLISHED:
+ connstate = 1;
+ ib_query_qp(ia->ri_id->qp, &attr,
+ IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
+ &iattr);
+ dprintk("RPC: %s: %d responder resources"
+ " (%d initiator)\n",
+ __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
+ goto connected;
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ connstate = -ENOTCONN;
+ goto connected;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ connstate = -ENETDOWN;
+ goto connected;
+ case RDMA_CM_EVENT_REJECTED:
+ connstate = -ECONNREFUSED;
+ goto connected;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ connstate = -ECONNABORTED;
+ goto connected;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ connstate = -ENODEV;
+connected:
+ dprintk("RPC: %s: %s: %u.%u.%u.%u:%u"
+ " (ep 0x%p event 0x%x)\n",
+ __func__,
+ (event->event <= 11) ? conn[event->event] :
+ "unknown connection error",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port),
+ ep, event->event);
+ atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
+ dprintk("RPC: %s: %sconnected\n",
+ __func__, connstate > 0 ? "" : "dis");
+ ep->rep_connected = connstate;
+ ep->rep_func(ep);
+ wake_up_all(&ep->rep_connect_wait);
+ break;
+ default:
+ ia->ri_async_rc = -EINVAL;
+ dprintk("RPC: %s: unexpected CM event %X\n",
+ __func__, event->event);
+ complete(&ia->ri_done);
+ break;
+ }
+
+ return 0;
+}
+
+static struct rdma_cm_id *
+rpcrdma_create_id(struct rpcrdma_xprt *xprt,
+ struct rpcrdma_ia *ia, struct sockaddr *addr)
+{
+ struct rdma_cm_id *id;
+ int rc;
+
+ id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
+ if (IS_ERR(id)) {
+ rc = PTR_ERR(id);
+ dprintk("RPC: %s: rdma_create_id() failed %i\n",
+ __func__, rc);
+ return id;
+ }
+
+ ia->ri_async_rc = 0;
+ rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+ if (rc) {
+ dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
+ __func__, rc);
+ goto out;
+ }
+ wait_for_completion(&ia->ri_done);
+ rc = ia->ri_async_rc;
+ if (rc)
+ goto out;
+
+ ia->ri_async_rc = 0;
+ rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
+ if (rc) {
+ dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
+ __func__, rc);
+ goto out;
+ }
+ wait_for_completion(&ia->ri_done);
+ rc = ia->ri_async_rc;
+ if (rc)
+ goto out;
+
+ return id;
+
+out:
+ rdma_destroy_id(id);
+ return ERR_PTR(rc);
+}
+
+/*
+ * Drain any cq, prior to teardown.
+ */
+static void
+rpcrdma_clean_cq(struct ib_cq *cq)
+{
+ struct ib_wc wc;
+ int count = 0;
+
+ while (1 == ib_poll_cq(cq, 1, &wc))
+ ++count;
+
+ if (count)
+ dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
+ __func__, count, wc.opcode);
+}
+
+/*
+ * Exported functions.
+ */
+
+/*
+ * Open and initialize an Interface Adapter.
+ * o initializes fields of struct rpcrdma_ia, including
+ * interface and provider attributes and protection zone.
+ */
+int
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+{
+ int rc;
+ struct rpcrdma_ia *ia = &xprt->rx_ia;
+
+ init_completion(&ia->ri_done);
+
+ ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+ if (IS_ERR(ia->ri_id)) {
+ rc = PTR_ERR(ia->ri_id);
+ goto out1;
+ }
+
+ ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+ if (IS_ERR(ia->ri_pd)) {
+ rc = PTR_ERR(ia->ri_pd);
+ dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ /*
+ * Optionally obtain an underlying physical identity mapping in
+ * order to do a memory window-based bind. This base registration
+ * is protected from remote access - that is enabled only by binding
+ * for the specific bytes targeted during each RPC operation, and
+ * revoked after the corresponding completion similar to a storage
+ * adapter.
+ */
+ if (memreg > RPCRDMA_REGISTER) {
+ int mem_priv = IB_ACCESS_LOCAL_WRITE;
+ switch (memreg) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+ mem_priv |= IB_ACCESS_REMOTE_WRITE;
+ mem_priv |= IB_ACCESS_REMOTE_READ;
+ break;
+#endif
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ mem_priv |= IB_ACCESS_MW_BIND;
+ break;
+ default:
+ break;
+ }
+ ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
+ if (IS_ERR(ia->ri_bind_mem)) {
+ printk(KERN_ALERT "%s: ib_get_dma_mr for "
+ "phys register failed with %lX\n\t"
+ "Will continue with degraded performance\n",
+ __func__, PTR_ERR(ia->ri_bind_mem));
+ memreg = RPCRDMA_REGISTER;
+ ia->ri_bind_mem = NULL;
+ }
+ }
+
+ /* Else will do memory reg/dereg for each chunk */
+ ia->ri_memreg_strategy = memreg;
+
+ return 0;
+out2:
+ rdma_destroy_id(ia->ri_id);
+out1:
+ return rc;
+}
+
+/*
+ * Clean up/close an IA.
+ * o if event handles and PD have been initialized, free them.
+ * o close the IA
+ */
+void
+rpcrdma_ia_close(struct rpcrdma_ia *ia)
+{
+ int rc;
+
+ dprintk("RPC: %s: entering\n", __func__);
+ if (ia->ri_bind_mem != NULL) {
+ rc = ib_dereg_mr(ia->ri_bind_mem);
+ dprintk("RPC: %s: ib_dereg_mr returned %i\n",
+ __func__, rc);
+ }
+ if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
+ rdma_destroy_qp(ia->ri_id);
+ if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
+ rc = ib_dealloc_pd(ia->ri_pd);
+ dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
+ __func__, rc);
+ }
+ if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
+ rdma_destroy_id(ia->ri_id);
+}
+
+/*
+ * Create unconnected endpoint.
+ */
+int
+rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ struct rpcrdma_create_data_internal *cdata)
+{
+ struct ib_device_attr devattr;
+ int rc;
+
+ rc = ib_query_device(ia->ri_id->device, &devattr);
+ if (rc) {
+ dprintk("RPC: %s: ib_query_device failed %d\n",
+ __func__, rc);
+ return rc;
+ }
+
+ /* check provider's send/recv wr limits */
+ if (cdata->max_requests > devattr.max_qp_wr)
+ cdata->max_requests = devattr.max_qp_wr;
+
+ ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+ ep->rep_attr.qp_context = ep;
+ /* send_cq and recv_cq initialized below */
+ ep->rep_attr.srq = NULL;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ /* Add room for mw_binds+unbinds - overkill! */
+ ep->rep_attr.cap.max_send_wr++;
+ ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+ return -EINVAL;
+ break;
+ default:
+ break;
+ }
+ ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
+ ep->rep_attr.cap.max_recv_sge = 1;
+ ep->rep_attr.cap.max_inline_data = 0;
+ ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ ep->rep_attr.qp_type = IB_QPT_RC;
+ ep->rep_attr.port_num = ~0;
+
+ dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
+ "iovs: send %d recv %d\n",
+ __func__,
+ ep->rep_attr.cap.max_send_wr,
+ ep->rep_attr.cap.max_recv_wr,
+ ep->rep_attr.cap.max_send_sge,
+ ep->rep_attr.cap.max_recv_sge);
+
+ /* set trigger for requesting send completion */
+ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
+ break;
+ default:
+ break;
+ }
+ if (ep->rep_cqinit <= 2)
+ ep->rep_cqinit = 0;
+ INIT_CQCOUNT(ep);
+ ep->rep_ia = ia;
+ init_waitqueue_head(&ep->rep_connect_wait);
+
+ /*
+ * Create a single cq for receive dto and mw_bind (only ever
+ * care about unbind, really). Send completions are suppressed.
+ * Use single threaded tasklet upcalls to maintain ordering.
+ */
+ ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
+ rpcrdma_cq_async_error_upcall, NULL,
+ ep->rep_attr.cap.max_recv_wr +
+ ep->rep_attr.cap.max_send_wr + 1, 0);
+ if (IS_ERR(ep->rep_cq)) {
+ rc = PTR_ERR(ep->rep_cq);
+ dprintk("RPC: %s: ib_create_cq failed: %i\n",
+ __func__, rc);
+ goto out1;
+ }
+
+ rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ ep->rep_attr.send_cq = ep->rep_cq;
+ ep->rep_attr.recv_cq = ep->rep_cq;
+
+ /* Initialize cma parameters */
+
+ /* RPC/RDMA does not use private data */
+ ep->rep_remote_cma.private_data = NULL;
+ ep->rep_remote_cma.private_data_len = 0;
+
+ /* Client offers RDMA Read but does not initiate */
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_BOUNCEBUFFERS:
+ ep->rep_remote_cma.responder_resources = 0;
+ break;
+ case RPCRDMA_MTHCAFMR:
+ case RPCRDMA_REGISTER:
+ ep->rep_remote_cma.responder_resources = cdata->max_requests *
+ (RPCRDMA_MAX_DATA_SEGS / 8);
+ break;
+ case RPCRDMA_MEMWINDOWS:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+#endif
+ ep->rep_remote_cma.responder_resources = cdata->max_requests *
+ (RPCRDMA_MAX_DATA_SEGS / 2);
+ break;
+ default:
+ break;
+ }
+ if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+ ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
+ ep->rep_remote_cma.initiator_depth = 0;
+
+ ep->rep_remote_cma.retry_count = 7;
+ ep->rep_remote_cma.flow_control = 0;
+ ep->rep_remote_cma.rnr_retry_count = 0;
+
+ return 0;
+
+out2:
+ if (ib_destroy_cq(ep->rep_cq))
+ ;
+out1:
+ return rc;
+}
+
+/*
+ * rpcrdma_ep_destroy
+ *
+ * Disconnect and destroy endpoint. After this, the only
+ * valid operations on the ep are to free it (if dynamically
+ * allocated) or re-create it.
+ *
+ * The caller's error handling must be sure to not leak the endpoint
+ * if this function fails.
+ */
+int
+rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ int rc;
+
+ dprintk("RPC: %s: entering, connected is %d\n",
+ __func__, ep->rep_connected);
+
+ if (ia->ri_id->qp) {
+ rc = rpcrdma_ep_disconnect(ep, ia);
+ if (rc)
+ dprintk("RPC: %s: rpcrdma_ep_disconnect"
+ " returned %i\n", __func__, rc);
+ }
+
+ ep->rep_func = NULL;
+
+ /* padding - could be done in rpcrdma_buffer_destroy... */
+ if (ep->rep_pad_mr) {
+ rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
+ ep->rep_pad_mr = NULL;
+ }
+
+ if (ia->ri_id->qp) {
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
+ }
+
+ rpcrdma_clean_cq(ep->rep_cq);
+ rc = ib_destroy_cq(ep->rep_cq);
+ if (rc)
+ dprintk("RPC: %s: ib_destroy_cq returned %i\n",
+ __func__, rc);
+
+ return rc;
+}
+
+/*
+ * Connect unconnected endpoint.
+ */
+int
+rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ struct rdma_cm_id *id;
+ int rc = 0;
+ int retry_count = 0;
+ int reconnect = (ep->rep_connected != 0);
+
+ if (reconnect) {
+ struct rpcrdma_xprt *xprt;
+retry:
+ rc = rpcrdma_ep_disconnect(ep, ia);
+ if (rc && rc != -ENOTCONN)
+ dprintk("RPC: %s: rpcrdma_ep_disconnect"
+ " status %i\n", __func__, rc);
+ rpcrdma_clean_cq(ep->rep_cq);
+
+ xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+ id = rpcrdma_create_id(xprt, ia,
+ (struct sockaddr *)&xprt->rx_data.addr);
+ if (IS_ERR(id)) {
+ rc = PTR_ERR(id);
+ goto out;
+ }
+ /* TEMP TEMP TEMP - fail if new device:
+ * Deregister/remarshal *all* requests!
+ * Close and recreate adapter, pd, etc!
+ * Re-determine all attributes still sane!
+ * More stuff I haven't thought of!
+ * Rrrgh!
+ */
+ if (ia->ri_id->device != id->device) {
+ printk("RPC: %s: can't reconnect on "
+ "different device!\n", __func__);
+ rdma_destroy_id(id);
+ rc = -ENETDOWN;
+ goto out;
+ }
+ /* END TEMP */
+ rdma_destroy_id(ia->ri_id);
+ ia->ri_id = id;
+ }
+
+ rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ goto out;
+ }
+
+/* XXX Tavor device performs badly with 2K MTU! */
+if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
+ struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
+ if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
+ (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
+ pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
+ struct ib_qp_attr attr = {
+ .path_mtu = IB_MTU_1024
+ };
+ rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
+ }
+}
+
+ /* Theoretically a client initiator_depth > 0 is not needed,
+ * but many peers fail to complete the connection unless they
+ * == responder_resources! */
+ if (ep->rep_remote_cma.initiator_depth !=
+ ep->rep_remote_cma.responder_resources)
+ ep->rep_remote_cma.initiator_depth =
+ ep->rep_remote_cma.responder_resources;
+
+ ep->rep_connected = 0;
+
+ rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
+ if (rc) {
+ dprintk("RPC: %s: rdma_connect() failed with %i\n",
+ __func__, rc);
+ goto out;
+ }
+
+ if (reconnect)
+ return 0;
+
+ wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
+
+ /*
+ * Check state. A non-peer reject indicates no listener
+ * (ECONNREFUSED), which may be a transient state. All
+ * others indicate a transport condition which has already
+ * undergone a best-effort.
+ */
+ if (ep->rep_connected == -ECONNREFUSED
+ && ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
+ dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
+ goto retry;
+ }
+ if (ep->rep_connected <= 0) {
+ /* Sometimes, the only way to reliably connect to remote
+ * CMs is to use same nonzero values for ORD and IRD. */
+ ep->rep_remote_cma.initiator_depth =
+ ep->rep_remote_cma.responder_resources;
+ if (ep->rep_remote_cma.initiator_depth == 0)
+ ++ep->rep_remote_cma.initiator_depth;
+ if (ep->rep_remote_cma.responder_resources == 0)
+ ++ep->rep_remote_cma.responder_resources;
+ if (retry_count++ == 0)
+ goto retry;
+ rc = ep->rep_connected;
+ } else {
+ dprintk("RPC: %s: connected\n", __func__);
+ }
+
+out:
+ if (rc)
+ ep->rep_connected = rc;
+ return rc;
+}
+
+/*
+ * rpcrdma_ep_disconnect
+ *
+ * This is separate from destroy to facilitate the ability
+ * to reconnect without recreating the endpoint.
+ *
+ * This call is not reentrant, and must not be made in parallel
+ * on the same endpoint.
+ */
+int
+rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ int rc;
+
+ rpcrdma_clean_cq(ep->rep_cq);
+ rc = rdma_disconnect(ia->ri_id);
+ if (!rc) {
+ /* returns without wait if not connected */
+ wait_event_interruptible(ep->rep_connect_wait,
+ ep->rep_connected != 1);
+ dprintk("RPC: %s: after wait, %sconnected\n", __func__,
+ (ep->rep_connected == 1) ? "still " : "dis");
+ } else {
+ dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
+ ep->rep_connected = rc;
+ }
+ return rc;
+}
+
+/*
+ * Initialize buffer memory
+ */
+int
+rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
+{
+ char *p;
+ size_t len;
+ int i, rc;
+
+ buf->rb_max_requests = cdata->max_requests;
+ spin_lock_init(&buf->rb_lock);
+ atomic_set(&buf->rb_credits, 1);
+
+ /* Need to allocate:
+ * 1. arrays for send and recv pointers
+ * 2. arrays of struct rpcrdma_req to fill in pointers
+ * 3. array of struct rpcrdma_rep for replies
+ * 4. padding, if any
+ * 5. mw's, if any
+ * Send/recv buffers in req/rep need to be registered
+ */
+
+ len = buf->rb_max_requests *
+ (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
+ len += cdata->padding;
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ /* TBD we are perhaps overallocating here */
+ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_mw);
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_mw);
+ break;
+ default:
+ break;
+ }
+
+ /* allocate 1, 4 and 5 in one shot */
+ p = kzalloc(len, GFP_KERNEL);
+ if (p == NULL) {
+ dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
+ __func__, len);
+ rc = -ENOMEM;
+ goto out;
+ }
+ buf->rb_pool = p; /* for freeing it later */
+
+ buf->rb_send_bufs = (struct rpcrdma_req **) p;
+ p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
+ buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
+ p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
+
+ /*
+ * Register the zeroed pad buffer, if any.
+ */
+ if (cdata->padding) {
+ rc = rpcrdma_register_internal(ia, p, cdata->padding,
+ &ep->rep_pad_mr, &ep->rep_pad);
+ if (rc)
+ goto out;
+ }
+ p += cdata->padding;
+
+ /*
+ * Allocate the fmr's, or mw's for mw_bind chunk registration.
+ * We "cycle" the mw's in order to minimize rkey reuse,
+ * and also reduce unbind-to-bind collision.
+ */
+ INIT_LIST_HEAD(&buf->rb_mws);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ {
+ struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+ struct ib_fmr_attr fa = {
+ RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
+ };
+ /* TBD we are perhaps overallocating here */
+ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+ r->r.fmr = ib_alloc_fmr(ia->ri_pd,
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
+ &fa);
+ if (IS_ERR(r->r.fmr)) {
+ rc = PTR_ERR(r->r.fmr);
+ dprintk("RPC: %s: ib_alloc_fmr"
+ " failed %i\n", __func__, rc);
+ goto out;
+ }
+ list_add(&r->mw_list, &buf->rb_mws);
+ ++r;
+ }
+ }
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ {
+ struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+ /* Allocate one extra request's worth, for full cycling */
+ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+ r->r.mw = ib_alloc_mw(ia->ri_pd);
+ if (IS_ERR(r->r.mw)) {
+ rc = PTR_ERR(r->r.mw);
+ dprintk("RPC: %s: ib_alloc_mw"
+ " failed %i\n", __func__, rc);
+ goto out;
+ }
+ list_add(&r->mw_list, &buf->rb_mws);
+ ++r;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Allocate/init the request/reply buffers. Doing this
+ * using kmalloc for now -- one for each buf.
+ */
+ for (i = 0; i < buf->rb_max_requests; i++) {
+ struct rpcrdma_req *req;
+ struct rpcrdma_rep *rep;
+
+ len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
+ /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
+ /* Typical ~2400b, so rounding up saves work later */
+ if (len < 4096)
+ len = 4096;
+ req = kmalloc(len, GFP_KERNEL);
+ if (req == NULL) {
+ dprintk("RPC: %s: request buffer %d alloc"
+ " failed\n", __func__, i);
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(req, 0, sizeof(struct rpcrdma_req));
+ buf->rb_send_bufs[i] = req;
+ buf->rb_send_bufs[i]->rl_buffer = buf;
+
+ rc = rpcrdma_register_internal(ia, req->rl_base,
+ len - offsetof(struct rpcrdma_req, rl_base),
+ &buf->rb_send_bufs[i]->rl_handle,
+ &buf->rb_send_bufs[i]->rl_iov);
+ if (rc)
+ goto out;
+
+ buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+
+ len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
+ rep = kmalloc(len, GFP_KERNEL);
+ if (rep == NULL) {
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, i);
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(rep, 0, sizeof(struct rpcrdma_rep));
+ buf->rb_recv_bufs[i] = rep;
+ buf->rb_recv_bufs[i]->rr_buffer = buf;
+ init_waitqueue_head(&rep->rr_unbind);
+
+ rc = rpcrdma_register_internal(ia, rep->rr_base,
+ len - offsetof(struct rpcrdma_rep, rr_base),
+ &buf->rb_recv_bufs[i]->rr_handle,
+ &buf->rb_recv_bufs[i]->rr_iov);
+ if (rc)
+ goto out;
+
+ }
+ dprintk("RPC: %s: max_requests %d\n",
+ __func__, buf->rb_max_requests);
+ /* done */
+ return 0;
+out:
+ rpcrdma_buffer_destroy(buf);
+ return rc;
+}
+
+/*
+ * Unregister and destroy buffer memory. Need to deal with
+ * partial initialization, so it's callable from failed create.
+ * Must be called before destroying endpoint, as registrations
+ * reference it.
+ */
+void
+rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+{
+ int rc, i;
+ struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+
+ /* clean up in reverse order from create
+ * 1. recv mr memory (mr free, then kfree)
+ * 1a. bind mw memory
+ * 2. send mr memory (mr free, then kfree)
+ * 3. padding (if any) [moved to rpcrdma_ep_destroy]
+ * 4. arrays
+ */
+ dprintk("RPC: %s: entering\n", __func__);
+
+ for (i = 0; i < buf->rb_max_requests; i++) {
+ if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
+ rpcrdma_deregister_internal(ia,
+ buf->rb_recv_bufs[i]->rr_handle,
+ &buf->rb_recv_bufs[i]->rr_iov);
+ kfree(buf->rb_recv_bufs[i]);
+ }
+ if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
+ while (!list_empty(&buf->rb_mws)) {
+ struct rpcrdma_mw *r;
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_fmr"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ rc = ib_dealloc_mw(r->r.mw);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_mw"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ default:
+ break;
+ }
+ }
+ rpcrdma_deregister_internal(ia,
+ buf->rb_send_bufs[i]->rl_handle,
+ &buf->rb_send_bufs[i]->rl_iov);
+ kfree(buf->rb_send_bufs[i]);
+ }
+ }
+
+ kfree(buf->rb_pool);
+}
+
+/*
+ * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if needed) is attached to send buffer upon return.
+ * Rule:
+ * rb_send_index and rb_recv_index MUST always be pointing to the
+ * *next* available buffer (non-NULL). They are incremented after
+ * removing buffers, and decremented *before* returning them.
+ */
+struct rpcrdma_req *
+rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
+{
+ struct rpcrdma_req *req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ if (buffers->rb_send_index == buffers->rb_max_requests) {
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ dprintk("RPC: %s: out of request buffers\n", __func__);
+ return ((struct rpcrdma_req *)NULL);
+ }
+
+ req = buffers->rb_send_bufs[buffers->rb_send_index];
+ if (buffers->rb_send_index < buffers->rb_recv_index) {
+ dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
+ __func__,
+ buffers->rb_recv_index - buffers->rb_send_index);
+ req->rl_reply = NULL;
+ } else {
+ req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+ buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+ }
+ buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
+ if (!list_empty(&buffers->rb_mws)) {
+ int i = RPCRDMA_MAX_SEGS - 1;
+ do {
+ struct rpcrdma_mw *r;
+ r = list_entry(buffers->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ req->rl_segments[i].mr_chunk.rl_mw = r;
+ } while (--i >= 0);
+ }
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ return req;
+}
+
+/*
+ * Put request/reply buffers back into pool.
+ * Pre-decrement counter/array index.
+ */
+void
+rpcrdma_buffer_put(struct rpcrdma_req *req)
+{
+ struct rpcrdma_buffer *buffers = req->rl_buffer;
+ struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
+ int i;
+ unsigned long flags;
+
+ BUG_ON(req->rl_nchunks != 0);
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ buffers->rb_send_bufs[--buffers->rb_send_index] = req;
+ req->rl_niovs = 0;
+ if (req->rl_reply) {
+ buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
+ init_waitqueue_head(&req->rl_reply->rr_unbind);
+ req->rl_reply->rr_func = NULL;
+ req->rl_reply = NULL;
+ }
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ /*
+ * Cycle mw's back in reverse order, and "spin" them.
+ * This delays and scrambles reuse as much as possible.
+ */
+ i = 1;
+ do {
+ struct rpcrdma_mw **mw;
+ mw = &req->rl_segments[i].mr_chunk.rl_mw;
+ list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
+ *mw = NULL;
+ } while (++i < RPCRDMA_MAX_SEGS);
+ list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
+ &buffers->rb_mws);
+ req->rl_segments[0].mr_chunk.rl_mw = NULL;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Recover reply buffers from pool.
+ * This happens when recovering from error conditions.
+ * Post-increment counter/array index.
+ */
+void
+rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
+{
+ struct rpcrdma_buffer *buffers = req->rl_buffer;
+ unsigned long flags;
+
+ if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
+ buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ if (buffers->rb_recv_index < buffers->rb_max_requests) {
+ req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+ buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+ }
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Put reply buffers back into pool when not attached to
+ * request. This happens in error conditions, and when
+ * aborting unbinds. Pre-decrement counter/array index.
+ */
+void
+rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buffers = rep->rr_buffer;
+ unsigned long flags;
+
+ rep->rr_func = NULL;
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Wrappers for internal-use kmalloc memory registration, used by buffer code.
+ */
+
+int
+rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
+ struct ib_mr **mrp, struct ib_sge *iov)
+{
+ struct ib_phys_buf ipb;
+ struct ib_mr *mr;
+ int rc;
+
+ /*
+ * All memory passed here was kmalloc'ed, therefore phys-contiguous.
+ */
+ iov->addr = ib_dma_map_single(ia->ri_id->device,
+ va, len, DMA_BIDIRECTIONAL);
+ iov->length = len;
+
+ if (ia->ri_bind_mem != NULL) {
+ *mrp = NULL;
+ iov->lkey = ia->ri_bind_mem->lkey;
+ return 0;
+ }
+
+ ipb.addr = iov->addr;
+ ipb.size = iov->length;
+ mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
+ IB_ACCESS_LOCAL_WRITE, &iov->addr);
+
+ dprintk("RPC: %s: phys convert: 0x%llx "
+ "registered 0x%llx length %d\n",
+ __func__, ipb.addr, iov->addr, len);
+
+ if (IS_ERR(mr)) {
+ *mrp = NULL;
+ rc = PTR_ERR(mr);
+ dprintk("RPC: %s: failed with %i\n", __func__, rc);
+ } else {
+ *mrp = mr;
+ iov->lkey = mr->lkey;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int
+rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
+ struct ib_mr *mr, struct ib_sge *iov)
+{
+ int rc;
+
+ ib_dma_unmap_single(ia->ri_id->device,
+ iov->addr, iov->length, DMA_BIDIRECTIONAL);
+
+ if (NULL == mr)
+ return 0;
+
+ rc = ib_dereg_mr(mr);
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
+ return rc;
+}
+
+/*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+static void
+rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
+{
+ seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ seg->mr_dmalen = seg->mr_len;
+ if (seg->mr_page)
+ seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
+ seg->mr_page, offset_in_page(seg->mr_offset),
+ seg->mr_dmalen, seg->mr_dir);
+ else
+ seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
+ seg->mr_offset,
+ seg->mr_dmalen, seg->mr_dir);
+}
+
+static void
+rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
+{
+ if (seg->mr_page)
+ ib_dma_unmap_page(ia->ri_id->device,
+ seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+ else
+ ib_dma_unmap_single(ia->ri_id->device,
+ seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
+int
+rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+ int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+ IB_ACCESS_REMOTE_READ);
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int i;
+ int rc = 0;
+
+ switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+ rpcrdma_map_one(ia, seg, writing);
+ seg->mr_rkey = ia->ri_bind_mem->rkey;
+ seg->mr_base = seg->mr_dma;
+ seg->mr_nsegs = 1;
+ nsegs = 1;
+ break;
+#endif
+
+ /* Registration using fast memory registration */
+ case RPCRDMA_MTHCAFMR:
+ {
+ u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+ int len, pageoff = offset_in_page(seg->mr_offset);
+ seg1->mr_offset -= pageoff; /* start of page */
+ seg1->mr_len += pageoff;
+ len = -pageoff;
+ if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+ nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ physaddrs[i] = seg->mr_dma;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+ break;
+ }
+ nsegs = i;
+ rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+ physaddrs, nsegs, seg1->mr_dma);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_map_phys_fmr "
+ "%u@0x%llx+%i (%d)... status %i\n", __func__,
+ len, (unsigned long long)seg1->mr_dma,
+ pageoff, nsegs, rc);
+ while (nsegs--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+ seg1->mr_base = seg1->mr_dma + pageoff;
+ seg1->mr_nsegs = nsegs;
+ seg1->mr_len = len;
+ }
+ }
+ break;
+
+ /* Registration using memory windows */
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ {
+ struct ib_mw_bind param;
+ rpcrdma_map_one(ia, seg, writing);
+ param.mr = ia->ri_bind_mem;
+ param.wr_id = 0ULL; /* no send cookie */
+ param.addr = seg->mr_dma;
+ param.length = seg->mr_len;
+ param.send_flags = 0;
+ param.mw_access_flags = mem_priv;
+
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ rc = ib_bind_mw(ia->ri_id->qp,
+ seg->mr_chunk.rl_mw->r.mw, ¶m);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_bind_mw "
+ "%u@0x%llx status %i\n",
+ __func__, seg->mr_len,
+ (unsigned long long)seg->mr_dma, rc);
+ rpcrdma_unmap_one(ia, seg);
+ } else {
+ seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+ seg->mr_base = param.addr;
+ seg->mr_nsegs = 1;
+ nsegs = 1;
+ }
+ }
+ break;
+
+ /* Default registration each time */
+ default:
+ {
+ struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+ int len = 0;
+ if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+ nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ ipb[i].addr = seg->mr_dma;
+ ipb[i].size = seg->mr_len;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+ break;
+ }
+ nsegs = i;
+ seg1->mr_base = seg1->mr_dma;
+ seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+ ipb, nsegs, mem_priv, &seg1->mr_base);
+ if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+ rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+ dprintk("RPC: %s: failed ib_reg_phys_mr "
+ "%u@0x%llx (%d)... status %i\n",
+ __func__, len,
+ (unsigned long long)seg1->mr_dma, nsegs, rc);
+ while (nsegs--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+ seg1->mr_nsegs = nsegs;
+ seg1->mr_len = len;
+ }
+ }
+ break;
+ }
+ if (rc)
+ return -1;
+
+ return nsegs;
+}
+
+int
+rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_xprt *r_xprt, void *r)
+{
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int nsegs = seg->mr_nsegs, rc;
+
+ switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+ BUG_ON(nsegs != 1);
+ rpcrdma_unmap_one(ia, seg);
+ rc = 0;
+ break;
+#endif
+
+ case RPCRDMA_MTHCAFMR:
+ {
+ LIST_HEAD(l);
+ list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
+ rc = ib_unmap_fmr(&l);
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ }
+ if (rc)
+ dprintk("RPC: %s: failed ib_unmap_fmr,"
+ " status %i\n", __func__, rc);
+ break;
+
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ {
+ struct ib_mw_bind param;
+ BUG_ON(nsegs != 1);
+ param.mr = ia->ri_bind_mem;
+ param.addr = 0ULL; /* unbind */
+ param.length = 0;
+ param.mw_access_flags = 0;
+ if (r) {
+ param.wr_id = (u64) (unsigned long) r;
+ param.send_flags = IB_SEND_SIGNALED;
+ INIT_CQCOUNT(&r_xprt->rx_ep);
+ } else {
+ param.wr_id = 0ULL;
+ param.send_flags = 0;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ }
+ rc = ib_bind_mw(ia->ri_id->qp,
+ seg->mr_chunk.rl_mw->r.mw, ¶m);
+ rpcrdma_unmap_one(ia, seg);
+ }
+ if (rc)
+ dprintk("RPC: %s: failed ib_(un)bind_mw,"
+ " status %i\n", __func__, rc);
+ else
+ r = NULL; /* will upcall on completion */
+ break;
+
+ default:
+ rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+ seg1->mr_chunk.rl_mr = NULL;
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ if (rc)
+ dprintk("RPC: %s: failed ib_dereg_mr,"
+ " status %i\n", __func__, rc);
+ break;
+ }
+ if (r) {
+ struct rpcrdma_rep *rep = r;
+ void (*func)(struct rpcrdma_rep *) = rep->rr_func;
+ rep->rr_func = NULL;
+ func(rep); /* dereg done, callback now */
+ }
+ return nsegs;
+}
+
+/*
+ * Prepost any receive buffer, then post send.
+ *
+ * Receive buffer is donated to hardware, reclaimed upon recv completion.
+ */
+int
+rpcrdma_ep_post(struct rpcrdma_ia *ia,
+ struct rpcrdma_ep *ep,
+ struct rpcrdma_req *req)
+{
+ struct ib_send_wr send_wr, *send_wr_fail;
+ struct rpcrdma_rep *rep = req->rl_reply;
+ int rc;
+
+ if (rep) {
+ rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ if (rc)
+ goto out;
+ req->rl_reply = NULL;
+ }
+
+ send_wr.next = NULL;
+ send_wr.wr_id = 0ULL; /* no send cookie */
+ send_wr.sg_list = req->rl_send_iov;
+ send_wr.num_sge = req->rl_niovs;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.imm_data = 0;
+ if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
+ ib_dma_sync_single_for_device(ia->ri_id->device,
+ req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
+ DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_id->device,
+ req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
+ DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_id->device,
+ req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
+ DMA_TO_DEVICE);
+
+ if (DECR_CQCOUNT(ep) > 0)
+ send_wr.send_flags = 0;
+ else { /* Provider must take a send completion every now and then */
+ INIT_CQCOUNT(ep);
+ send_wr.send_flags = IB_SEND_SIGNALED;
+ }
+
+ rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
+ if (rc)
+ dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
+ rc);
+out:
+ return rc;
+}
+
+/*
+ * (Re)post a receive buffer.
+ */
+int
+rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
+ struct rpcrdma_ep *ep,
+ struct rpcrdma_rep *rep)
+{
+ struct ib_recv_wr recv_wr, *recv_wr_fail;
+ int rc;
+
+ recv_wr.next = NULL;
+ recv_wr.wr_id = (u64) (unsigned long) rep;
+ recv_wr.sg_list = &rep->rr_iov;
+ recv_wr.num_sge = 1;
+
+ ib_dma_sync_single_for_cpu(ia->ri_id->device,
+ rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
+
+ DECR_CQCOUNT(ep);
+ rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+
+ if (rc)
+ dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
+ rc);
+ return rc;
+}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
new file mode 100644
index 0000000..2427822
--- /dev/null
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
+#define _LINUX_SUNRPC_XPRT_RDMA_H
+
+#include <linux/wait.h> /* wait_queue_head_t, etc */
+#include <linux/spinlock.h> /* spinlock_t, etc */
+#include <asm/atomic.h> /* atomic_t, etc */
+
+#include <rdma/rdma_cm.h> /* RDMA connection api */
+#include <rdma/ib_verbs.h> /* RDMA verbs api */
+
+#include <linux/sunrpc/clnt.h> /* rpc_xprt */
+#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
+#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+
+/*
+ * Interface Adapter -- one per transport instance
+ */
+struct rpcrdma_ia {
+ struct rdma_cm_id *ri_id;
+ struct ib_pd *ri_pd;
+ struct ib_mr *ri_bind_mem;
+ struct completion ri_done;
+ int ri_async_rc;
+ enum rpcrdma_memreg ri_memreg_strategy;
+};
+
+/*
+ * RDMA Endpoint -- one per transport instance
+ */
+
+struct rpcrdma_ep {
+ atomic_t rep_cqcount;
+ int rep_cqinit;
+ int rep_connected;
+ struct rpcrdma_ia *rep_ia;
+ struct ib_cq *rep_cq;
+ struct ib_qp_init_attr rep_attr;
+ wait_queue_head_t rep_connect_wait;
+ struct ib_sge rep_pad; /* holds zeroed pad */
+ struct ib_mr *rep_pad_mr; /* holds zeroed pad */
+ void (*rep_func)(struct rpcrdma_ep *);
+ struct rpc_xprt *rep_xprt; /* for rep_func */
+ struct rdma_conn_param rep_remote_cma;
+ struct sockaddr_storage rep_remote_addr;
+};
+
+#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+
+/*
+ * struct rpcrdma_rep -- this structure encapsulates state required to recv
+ * and complete a reply, asychronously. It needs several pieces of
+ * state:
+ * o recv buffer (posted to provider)
+ * o ib_sge (also donated to provider)
+ * o status of reply (length, success or not)
+ * o bookkeeping state to get run by tasklet (list, etc)
+ *
+ * These are allocated during initialization, per-transport instance;
+ * however, the tasklet execution list itself is global, as it should
+ * always be pretty short.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ */
+
+/* temporary static scatter/gather max */
+#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */
+#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
+#define MAX_RPCRDMAHDR (\
+ /* max supported RPC/RDMA header */ \
+ sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
+ (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
+
+struct rpcrdma_buffer;
+
+struct rpcrdma_rep {
+ unsigned int rr_len; /* actual received reply length */
+ struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
+ struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
+ void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
+ struct list_head rr_list; /* tasklet list */
+ wait_queue_head_t rr_unbind; /* optional unbind wait */
+ struct ib_sge rr_iov; /* for posting */
+ struct ib_mr *rr_handle; /* handle for mem in rr_iov */
+ char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+};
+
+/*
+ * struct rpcrdma_req -- structure central to the request/reply sequence.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ *
+ * It includes pre-registered buffer memory for send AND recv.
+ * The recv buffer, however, is not owned by this structure, and
+ * is "donated" to the hardware when a recv is posted. When a
+ * reply is handled, the recv buffer used is given back to the
+ * struct rpcrdma_req associated with the request.
+ *
+ * In addition to the basic memory, this structure includes an array
+ * of iovs for send operations. The reason is that the iovs passed to
+ * ib_post_{send,recv} must not be modified until the work request
+ * completes.
+ *
+ * NOTES:
+ * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
+ * marshal. The number needed varies depending on the iov lists that
+ * are passed to us, the memory registration mode we are in, and if
+ * physical addressing is used, the layout.
+ */
+
+struct rpcrdma_mr_seg { /* chunk descriptors */
+ union { /* chunk memory handles */
+ struct ib_mr *rl_mr; /* if registered directly */
+ struct rpcrdma_mw { /* if registered from region */
+ union {
+ struct ib_mw *mw;
+ struct ib_fmr *fmr;
+ } r;
+ struct list_head mw_list;
+ } *rl_mw;
+ } mr_chunk;
+ u64 mr_base; /* registration result */
+ u32 mr_rkey; /* registration result */
+ u32 mr_len; /* length of chunk or segment */
+ int mr_nsegs; /* number of segments in chunk or 0 */
+ enum dma_data_direction mr_dir; /* segment mapping direction */
+ dma_addr_t mr_dma; /* segment mapping address */
+ size_t mr_dmalen; /* segment mapping length */
+ struct page *mr_page; /* owning page, if any */
+ char *mr_offset; /* kva if no page, else offset */
+};
+
+struct rpcrdma_req {
+ size_t rl_size; /* actual length of buffer */
+ unsigned int rl_niovs; /* 0, 2 or 4 */
+ unsigned int rl_nchunks; /* non-zero if chunks */
+ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
+ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
+ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
+ struct ib_sge rl_send_iov[4]; /* for active requests */
+ struct ib_sge rl_iov; /* for posting */
+ struct ib_mr *rl_handle; /* handle for mem in rl_iov */
+ char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
+ __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */
+};
+#define rpcr_to_rdmar(r) \
+ container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
+
+/*
+ * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
+ * inline requests/replies, and client/server credits.
+ *
+ * One of these is associated with a transport instance
+ */
+struct rpcrdma_buffer {
+ spinlock_t rb_lock; /* protects indexes */
+ atomic_t rb_credits; /* most recent server credits */
+ unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
+ int rb_max_requests;/* client max requests */
+ struct list_head rb_mws; /* optional memory windows/fmrs */
+ int rb_send_index;
+ struct rpcrdma_req **rb_send_bufs;
+ int rb_recv_index;
+ struct rpcrdma_rep **rb_recv_bufs;
+ char *rb_pool;
+};
+#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
+
+/*
+ * Internal structure for transport instance creation. This
+ * exists primarily for modularity.
+ *
+ * This data should be set with mount options
+ */
+struct rpcrdma_create_data_internal {
+ struct sockaddr_storage addr; /* RDMA server address */
+ unsigned int max_requests; /* max requests (slots) in flight */
+ unsigned int rsize; /* mount rsize - max read hdr+data */
+ unsigned int wsize; /* mount wsize - max write hdr+data */
+ unsigned int inline_rsize; /* max non-rdma read data payload */
+ unsigned int inline_wsize; /* max non-rdma write data payload */
+ unsigned int padding; /* non-rdma write header padding */
+};
+
+#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
+ (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize)
+
+#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
+ (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize)
+
+#define RPCRDMA_INLINE_PAD_VALUE(rq)\
+ rpcx_to_rdmad(rq->rq_task->tk_xprt).padding
+
+/*
+ * Statistics for RPCRDMA
+ */
+struct rpcrdma_stats {
+ unsigned long read_chunk_count;
+ unsigned long write_chunk_count;
+ unsigned long reply_chunk_count;
+
+ unsigned long long total_rdma_request;
+ unsigned long long total_rdma_reply;
+
+ unsigned long long pullup_copy_count;
+ unsigned long long fixup_copy_count;
+ unsigned long hardway_register_count;
+ unsigned long failed_marshal_count;
+ unsigned long bad_reply_count;
+};
+
+/*
+ * RPCRDMA transport -- encapsulates the structures above for
+ * integration with RPC.
+ *
+ * The contained structures are embedded, not pointers,
+ * for convenience. This structure need not be visible externally.
+ *
+ * It is allocated and initialized during mount, and released
+ * during unmount.
+ */
+struct rpcrdma_xprt {
+ struct rpc_xprt xprt;
+ struct rpcrdma_ia rx_ia;
+ struct rpcrdma_ep rx_ep;
+ struct rpcrdma_buffer rx_buf;
+ struct rpcrdma_create_data_internal rx_data;
+ struct delayed_work rdma_connect;
+ struct rpcrdma_stats rx_stats;
+};
+
+#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
+#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+
+/*
+ * Interface Adapter calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+void rpcrdma_ia_close(struct rpcrdma_ia *);
+
+/*
+ * Endpoint calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
+ struct rpcrdma_create_data_internal *);
+int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+
+int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
+ struct rpcrdma_req *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
+ struct rpcrdma_rep *);
+
+/*
+ * Buffer calls - xprtrdma/verbs.c
+ */
+int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
+ struct rpcrdma_ia *,
+ struct rpcrdma_create_data_internal *);
+void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+
+struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
+void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+
+int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
+ struct ib_mr **, struct ib_sge *);
+int rpcrdma_deregister_internal(struct rpcrdma_ia *,
+ struct ib_mr *, struct ib_sge *);
+
+int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+ int, int, struct rpcrdma_xprt *);
+int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+ struct rpcrdma_xprt *, void *);
+
+/*
+ * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+ */
+void rpcrdma_conn_func(struct rpcrdma_ep *);
+void rpcrdma_reply_handler(struct rpcrdma_rep *);
+
+/*
+ * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
+ */
+int rpcrdma_marshal_req(struct rpc_rqst *);
+
+#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 282efd4..02298f5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -13,10 +13,14 @@
* (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
*
* IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
+ * <gilles.quillard@bull.net>
*/
#include <linux/types.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/capability.h>
#include <linux/pagemap.h>
#include <linux/errno.h>
@@ -28,6 +32,7 @@
#include <linux/tcp.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
#include <linux/file.h>
#include <net/sock.h>
@@ -260,14 +265,29 @@
#define TCP_RCV_COPY_XID (1UL << 2)
#define TCP_RCV_COPY_DATA (1UL << 3)
-static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
{
- struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+ return (struct sockaddr *) &xprt->addr;
+}
+
+static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
+{
+ return (struct sockaddr_in *) &xprt->addr;
+}
+
+static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
+{
+ return (struct sockaddr_in6 *) &xprt->addr;
+}
+
+static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in *addr = xs_addr_in(xprt);
char *buf;
buf = kzalloc(20, GFP_KERNEL);
if (buf) {
- snprintf(buf, 20, "%u.%u.%u.%u",
+ snprintf(buf, 20, NIPQUAD_FMT,
NIPQUAD(addr->sin_addr.s_addr));
}
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
@@ -279,26 +299,123 @@
}
xprt->address_strings[RPC_DISPLAY_PORT] = buf;
- if (xprt->prot == IPPROTO_UDP)
- xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
- else
- xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ if (xprt->prot == IPPROTO_UDP)
+ snprintf(buf, 8, "udp");
+ else
+ snprintf(buf, 8, "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
buf = kzalloc(48, GFP_KERNEL);
if (buf) {
- snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+ snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
NIPQUAD(addr->sin_addr.s_addr),
ntohs(addr->sin_port),
xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
}
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+ buf = kzalloc(10, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 10, "%02x%02x%02x%02x",
+ NIPQUAD(addr->sin_addr.s_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%4hx",
+ ntohs(addr->sin_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+ buf = kzalloc(30, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port) >> 8,
+ ntohs(addr->sin_port) & 0xff);
+ }
+ xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+ xprt->address_strings[RPC_DISPLAY_NETID] =
+ kstrdup(xprt->prot == IPPROTO_UDP ?
+ RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
+}
+
+static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in6 *addr = xs_addr_in6(xprt);
+ char *buf;
+
+ buf = kzalloc(40, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 40, NIP6_FMT,
+ NIP6(addr->sin6_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%u",
+ ntohs(addr->sin6_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ if (xprt->prot == IPPROTO_UDP)
+ snprintf(buf, 8, "udp");
+ else
+ snprintf(buf, 8, "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
+
+ buf = kzalloc(64, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
+ NIP6(addr->sin6_addr),
+ ntohs(addr->sin6_port),
+ xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+ buf = kzalloc(36, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 36, NIP6_SEQFMT,
+ NIP6(addr->sin6_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%4hx",
+ ntohs(addr->sin6_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+ buf = kzalloc(50, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 50, NIP6_FMT".%u.%u",
+ NIP6(addr->sin6_addr),
+ ntohs(addr->sin6_port) >> 8,
+ ntohs(addr->sin6_port) & 0xff);
+ }
+ xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+ xprt->address_strings[RPC_DISPLAY_NETID] =
+ kstrdup(xprt->prot == IPPROTO_UDP ?
+ RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
}
static void xs_free_peer_addresses(struct rpc_xprt *xprt)
{
- kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
- kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
- kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+ int i;
+
+ for (i = 0; i < RPC_DISPLAY_MAX; i++)
+ kfree(xprt->address_strings[i]);
}
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
@@ -463,19 +580,20 @@
req->rq_xtime = jiffies;
status = xs_sendpages(transport->sock,
- (struct sockaddr *) &xprt->addr,
+ xs_addr(xprt),
xprt->addrlen, xdr,
req->rq_bytes_sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (likely(status >= (int) req->rq_slen))
- return 0;
-
- /* Still some bytes left; set up for a retry later. */
- if (status > 0)
+ if (status >= 0) {
+ task->tk_bytes_sent += status;
+ if (status >= req->rq_slen)
+ return 0;
+ /* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
+ }
switch (status) {
case -ENETUNREACH:
@@ -523,7 +641,8 @@
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
- int status, retry = 0;
+ int status;
+ unsigned int retry = 0;
xs_encode_tcp_record_marker(&req->rq_snd_buf);
@@ -661,6 +780,7 @@
xs_free_peer_addresses(xprt);
kfree(xprt->slot);
kfree(xprt);
+ module_put(THIS_MODULE);
}
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1139,14 +1259,23 @@
*/
static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
{
- struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+ struct sockaddr *addr = xs_addr(xprt);
dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
- sap->sin_port = htons(port);
+ switch (addr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+ break;
+ default:
+ BUG();
+ }
}
-static int xs_bind(struct sock_xprt *transport, struct socket *sock)
+static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
{
struct sockaddr_in myaddr = {
.sin_family = AF_INET,
@@ -1174,8 +1303,42 @@
else
port--;
} while (err == -EADDRINUSE && port != transport->port);
- dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
- NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
+ dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n",
+ __FUNCTION__, NIPQUAD(myaddr.sin_addr),
+ port, err ? "failed" : "ok", err);
+ return err;
+}
+
+static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
+{
+ struct sockaddr_in6 myaddr = {
+ .sin6_family = AF_INET6,
+ };
+ struct sockaddr_in6 *sa;
+ int err;
+ unsigned short port = transport->port;
+
+ if (!transport->xprt.resvport)
+ port = 0;
+ sa = (struct sockaddr_in6 *)&transport->addr;
+ myaddr.sin6_addr = sa->sin6_addr;
+ do {
+ myaddr.sin6_port = htons(port);
+ err = kernel_bind(sock, (struct sockaddr *) &myaddr,
+ sizeof(myaddr));
+ if (!transport->xprt.resvport)
+ break;
+ if (err == 0) {
+ transport->port = port;
+ break;
+ }
+ if (port <= xprt_min_resvport)
+ port = xprt_max_resvport;
+ else
+ port--;
+ } while (err == -EADDRINUSE && port != transport->port);
+ dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
+ NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
return err;
}
@@ -1183,64 +1346,36 @@
static struct lock_class_key xs_key[2];
static struct lock_class_key xs_slock_key[2];
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
{
struct sock *sk = sock->sk;
+
BUG_ON(sock_owned_by_user(sk));
- switch (sk->sk_family) {
- case AF_INET:
- sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
- &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
- break;
+ sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
+ &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
+}
- case AF_INET6:
- sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
- &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
- break;
+static inline void xs_reclassify_socket6(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
- default:
- BUG();
- }
+ BUG_ON(sock_owned_by_user(sk));
+ sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
+ &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
}
#else
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
+{
+}
+
+static inline void xs_reclassify_socket6(struct socket *sock)
{
}
#endif
-/**
- * xs_udp_connect_worker - set up a UDP socket
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_udp_connect_worker(struct work_struct *work)
+static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
- int err, status = -EIO;
-
- if (xprt->shutdown || !xprt_bound(xprt))
- goto out;
-
- /* Start by resetting any existing state */
- xs_close(xprt);
-
- if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
- dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
- goto out;
- }
- xs_reclassify_socket(sock);
-
- if (xs_bind(transport, sock)) {
- sock_release(sock);
- goto out;
- }
-
- dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
if (!transport->inet) {
struct sock *sk = sock->sk;
@@ -1265,6 +1400,84 @@
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
+}
+
+/**
+ * xs_udp_connect_worker4 - set up a UDP socket
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_udp_connect_worker4(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
+ int err, status = -EIO;
+
+ if (xprt->shutdown || !xprt_bound(xprt))
+ goto out;
+
+ /* Start by resetting any existing state */
+ xs_close(xprt);
+
+ if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
+ goto out;
+ }
+ xs_reclassify_socket4(sock);
+
+ if (xs_bind4(transport, sock)) {
+ sock_release(sock);
+ goto out;
+ }
+
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+ xs_udp_finish_connecting(xprt, sock);
+ status = 0;
+out:
+ xprt_wake_pending_tasks(xprt, status);
+ xprt_clear_connecting(xprt);
+}
+
+/**
+ * xs_udp_connect_worker6 - set up a UDP socket
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_udp_connect_worker6(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
+ int err, status = -EIO;
+
+ if (xprt->shutdown || !xprt_bound(xprt))
+ goto out;
+
+ /* Start by resetting any existing state */
+ xs_close(xprt);
+
+ if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
+ goto out;
+ }
+ xs_reclassify_socket6(sock);
+
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out;
+ }
+
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+ xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
xprt_wake_pending_tasks(xprt, status);
@@ -1295,42 +1508,9 @@
result);
}
-/**
- * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_tcp_connect_worker(struct work_struct *work)
+static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
- int err, status = -EIO;
-
- if (xprt->shutdown || !xprt_bound(xprt))
- goto out;
-
- if (!sock) {
- /* start from scratch */
- if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport "
- "socket (%d).\n", -err);
- goto out;
- }
- xs_reclassify_socket(sock);
-
- if (xs_bind(transport, sock)) {
- sock_release(sock);
- goto out;
- }
- } else
- /* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt);
-
- dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
if (!transport->inet) {
struct sock *sk = sock->sk;
@@ -1364,8 +1544,46 @@
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
- status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
- xprt->addrlen, O_NONBLOCK);
+ return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+}
+
+/**
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker4(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
+ int err, status = -EIO;
+
+ if (xprt->shutdown || !xprt_bound(xprt))
+ goto out;
+
+ if (!sock) {
+ /* start from scratch */
+ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
+ goto out;
+ }
+ xs_reclassify_socket4(sock);
+
+ if (xs_bind4(transport, sock) < 0) {
+ sock_release(sock);
+ goto out;
+ }
+ } else
+ /* "close" the socket, preserving the local port */
+ xs_tcp_reuse_connection(xprt);
+
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+ status = xs_tcp_finish_connecting(xprt, sock);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
@@ -1391,6 +1609,66 @@
}
/**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
+ int err, status = -EIO;
+
+ if (xprt->shutdown || !xprt_bound(xprt))
+ goto out;
+
+ if (!sock) {
+ /* start from scratch */
+ if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
+ goto out;
+ }
+ xs_reclassify_socket6(sock);
+
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out;
+ }
+ } else
+ /* "close" the socket, preserving the local port */
+ xs_tcp_reuse_connection(xprt);
+
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+ status = xs_tcp_finish_connecting(xprt, sock);
+ dprintk("RPC: %p connect status %d connected %d sock state %d\n",
+ xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
+ if (status < 0) {
+ switch (status) {
+ case -EINPROGRESS:
+ case -EALREADY:
+ goto out_clear;
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ /* retry with existing socket, after a delay */
+ break;
+ default:
+ /* get rid of existing socket, and retry */
+ xs_close(xprt);
+ break;
+ }
+ }
+out:
+ xprt_wake_pending_tasks(xprt, status);
+out_clear:
+ xprt_clear_connecting(xprt);
+}
+
+/**
* xs_connect - connect a socket to a remote endpoint
* @task: address of RPC task that manages state of connect request
*
@@ -1508,7 +1786,8 @@
.print_stats = xs_tcp_print_stats,
};
-static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
+static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
+ unsigned int slot_table_size)
{
struct rpc_xprt *xprt;
struct sock_xprt *new;
@@ -1549,8 +1828,9 @@
* @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
{
+ struct sockaddr *addr = args->dstaddr;
struct rpc_xprt *xprt;
struct sock_xprt *transport;
@@ -1559,15 +1839,11 @@
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
- xprt_set_bound(xprt);
-
xprt->prot = IPPROTO_UDP;
xprt->tsh_size = 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
- INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
xprt->bind_timeout = XS_BIND_TO;
xprt->connect_timeout = XS_UDP_CONN_TO;
xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1580,11 +1856,37 @@
else
xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
- xs_format_peer_addresses(xprt);
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_udp_connect_worker4);
+ xs_format_ipv4_peer_addresses(xprt);
+ break;
+ case AF_INET6:
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_udp_connect_worker6);
+ xs_format_ipv6_peer_addresses(xprt);
+ break;
+ default:
+ kfree(xprt);
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
dprintk("RPC: set up transport to address %s\n",
xprt->address_strings[RPC_DISPLAY_ALL]);
- return xprt;
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+
+ kfree(xprt->slot);
+ kfree(xprt);
+ return ERR_PTR(-EINVAL);
}
/**
@@ -1592,8 +1894,9 @@
* @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
{
+ struct sockaddr *addr = args->dstaddr;
struct rpc_xprt *xprt;
struct sock_xprt *transport;
@@ -1602,14 +1905,10 @@
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
- xprt_set_bound(xprt);
-
xprt->prot = IPPROTO_TCP;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
- INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
xprt->bind_timeout = XS_BIND_TO;
xprt->connect_timeout = XS_TCP_CONN_TO;
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1622,15 +1921,55 @@
else
xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
- xs_format_peer_addresses(xprt);
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
+ xs_format_ipv4_peer_addresses(xprt);
+ break;
+ case AF_INET6:
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
+ xs_format_ipv6_peer_addresses(xprt);
+ break;
+ default:
+ kfree(xprt);
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
dprintk("RPC: set up transport to address %s\n",
xprt->address_strings[RPC_DISPLAY_ALL]);
- return xprt;
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+
+ kfree(xprt->slot);
+ kfree(xprt);
+ return ERR_PTR(-EINVAL);
}
+static struct xprt_class xs_udp_transport = {
+ .list = LIST_HEAD_INIT(xs_udp_transport.list),
+ .name = "udp",
+ .owner = THIS_MODULE,
+ .ident = IPPROTO_UDP,
+ .setup = xs_setup_udp,
+};
+
+static struct xprt_class xs_tcp_transport = {
+ .list = LIST_HEAD_INIT(xs_tcp_transport.list),
+ .name = "tcp",
+ .owner = THIS_MODULE,
+ .ident = IPPROTO_TCP,
+ .setup = xs_setup_tcp,
+};
+
/**
- * init_socket_xprt - set up xprtsock's sysctls
+ * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
*
*/
int init_socket_xprt(void)
@@ -1640,11 +1979,14 @@
sunrpc_table_header = register_sysctl_table(sunrpc_table);
#endif
+ xprt_register_transport(&xs_udp_transport);
+ xprt_register_transport(&xs_tcp_transport);
+
return 0;
}
/**
- * cleanup_socket_xprt - remove xprtsock's sysctls
+ * cleanup_socket_xprt - remove xprtsock's sysctls, unregister
*
*/
void cleanup_socket_xprt(void)
@@ -1655,4 +1997,7 @@
sunrpc_table_header = NULL;
}
#endif
+
+ xprt_unregister_transport(&xs_udp_transport);
+ xprt_unregister_transport(&xs_tcp_transport);
}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3c3fff3..cf76150 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3932,7 +3932,7 @@
}
static unsigned int selinux_ip_postroute_last(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *),
@@ -3941,7 +3941,6 @@
char *addrp;
int len, err = 0;
struct sock *sk;
- struct sk_buff *skb = *pskb;
struct avc_audit_data ad;
struct net_device *dev = (struct net_device *)out;
struct sk_security_struct *sksec;
@@ -3977,23 +3976,23 @@
}
static unsigned int selinux_ipv4_postroute_last(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET);
+ return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET);
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static unsigned int selinux_ipv6_postroute_last(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6);
+ return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET6);
}
#endif /* IPV6 */