Merge branch 'next/dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc

* 'next/dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc: (21 commits)
  arm/dt: tegra devicetree support
  arm/versatile: Add device tree support
  dt/irq: add irq_domain_generate_simple() helper
  irq: add irq_domain translation infrastructure
  dmaengine: imx-sdma: add device tree probe support
  dmaengine: imx-sdma: sdma_get_firmware does not need to copy fw_name
  dmaengine: imx-sdma: use platform_device_id to identify sdma version
  mmc: sdhci-esdhc-imx: add device tree probe support
  mmc: sdhci-pltfm: dt device does not pass parent to sdhci_alloc_host
  mmc: sdhci-esdhc-imx: get rid of the uses of cpu_is_mx()
  mmc: sdhci-esdhc-imx: do not reference platform data after probe
  mmc: sdhci-esdhc-imx: extend card_detect and write_protect support for mx5
  net/fec: add device tree probe support
  net: ibm_newemac: convert it to use of_get_phy_mode
  dt/net: add helper function of_get_phy_mode
  net/fec: gasket needs to be enabled for some i.mx
  serial/imx: add device tree probe support
  serial/imx: get rid of the uses of cpu_is_mx1()
  arm/dt: Add dtb make rule
  arm/dt: Add skeleton dtsi file
  ...
diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
new file mode 100644
index 0000000..7190c99
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
@@ -0,0 +1,36 @@
+Device-Tree bindings for input/gpio_keys.c keyboard driver
+
+Required properties:
+	- compatible = "gpio-keys";
+
+Optional properties:
+	- autorepeat: Boolean, Enable auto repeat feature of Linux input
+	  subsystem.
+
+Each button (key) is represented as a sub-node of "gpio-keys":
+Subnode properties:
+
+	- gpios: OF devcie-tree gpio specificatin.
+	- label: Descriptive name of the key.
+	- linux,code: Keycode to emit.
+
+Optional subnode-properties:
+	- linux,input-type: Specify event type this button/key generates.
+	  If not specified defaults to <1> == EV_KEY.
+	- debounce-interval: Debouncing interval time in milliseconds.
+	  If not specified defaults to 5.
+	- gpio-key,wakeup: Boolean, button can wake-up the system.
+
+Example nodes:
+
+	gpio_keys {
+			compatible = "gpio-keys";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			autorepeat;
+			button@21 {
+				label = "GPIO Key UP";
+				linux,code = <103>;
+				gpios = <&gpio1 0 1>;
+			};
+			...
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
new file mode 100644
index 0000000..2144af1
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
@@ -0,0 +1,14 @@
+* Freescale i.MX Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "fsl,<soc>-wdt"
+- reg : Should contain WDT registers location and length
+- interrupts : Should contain WDT interrupt
+
+Examples:
+
+wdt@73f98000 {
+	compatible = "fsl,imx51-wdt", "fsl,imx21-wdt";
+	reg = <0x73f98000 0x4000>;
+	interrupts = <58>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
new file mode 100644
index 0000000..79ead82
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
@@ -0,0 +1,11 @@
+* Samsung's Watchdog Timer Controller
+
+The Samsung's Watchdog controller is used for resuming system operation
+after a preset amount of time during which the WDT reset event has not
+occured.
+
+Required properties:
+- compatible : should be "samsung,s3c2410-wdt"
+- reg : base physical address of the controller and length of memory mapped
+	region.
+- interrupts : interrupt number to the cpu.
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
index 87019d2..09994c2 100644
--- a/Documentation/filesystems/nfs/Exporting
+++ b/Documentation/filesystems/nfs/Exporting
@@ -92,7 +92,14 @@
    1/ provide the filehandle fragment routines described below.
    2/ make sure that d_splice_alias is used rather than d_add
       when ->lookup finds an inode for a given parent and name.
-      Typically the ->lookup routine will end with a:
+
+      If inode is NULL, d_splice_alias(inode, dentry) is eqivalent to
+
+		d_add(dentry, inode), NULL
+
+      Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err)
+
+      Typically the ->lookup routine will simply end with a:
 
 		return d_splice_alias(inode, dentry);
 	}
diff --git a/Documentation/md.txt b/Documentation/md.txt
index f0eee83..fc94770 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -360,18 +360,20 @@
         A file recording the current state of the device in the array
 	which can be a comma separated list of
 	      faulty   - device has been kicked from active use due to
-                         a detected fault
+                         a detected fault or it has unacknowledged bad
+                         blocks
 	      in_sync  - device is a fully in-sync member of the array
 	      writemostly - device will only be subject to read
 		         requests if there are no other options.
 			 This applies only to raid1 arrays.
-	      blocked  - device has failed, metadata is "external",
-	                 and the failure hasn't been acknowledged yet.
+	      blocked  - device has failed, and the failure hasn't been
+			 acknowledged yet by the metadata handler.
 			 Writes that would write to this device if
 			 it were not faulty are blocked.
 	      spare    - device is working, but not a full member.
 			 This includes spares that are in the process
 			 of being recovered to
+	      write_error - device has ever seen a write error.
 	This list may grow in future.
 	This can be written to.
 	Writing "faulty"  simulates a failure on the device.
@@ -379,9 +381,11 @@
 	Writing "writemostly" sets the writemostly flag.
 	Writing "-writemostly" clears the writemostly flag.
 	Writing "blocked" sets the "blocked" flag.
-	Writing "-blocked" clears the "blocked" flag and allows writes
-		to complete.
+	Writing "-blocked" clears the "blocked" flags and allows writes
+		to complete and possibly simulates an error.
 	Writing "in_sync" sets the in_sync flag.
+	Writing "write_error" sets writeerrorseen flag.
+	Writing "-write_error" clears writeerrorseen flag.
 
 	This file responds to select/poll. Any change to 'faulty'
 	or 'blocked' causes an event.
@@ -419,7 +423,6 @@
         written, it will be rejected.
 
       recovery_start
-
         When the device is not 'in_sync', this records the number of
 	sectors from the start of the device which are known to be
 	correct.  This is normally zero, but during a recovery
@@ -435,6 +438,20 @@
 	Setting this to 'none' is equivalent to setting 'in_sync'.
 	Setting to any other value also clears the 'in_sync' flag.
 	
+      bad_blocks
+	This gives the list of all known bad blocks in the form of
+	start address and length (in sectors respectively). If output
+	is too big to fit in a page, it will be truncated. Writing
+	"sector length" to this file adds new acknowledged (i.e.
+	recorded to disk safely) bad blocks.
+
+      unacknowledged_bad_blocks
+	This gives the list of known-but-not-yet-saved-to-disk bad
+	blocks in the same form of 'bad_blocks'. If output is too big
+	to fit in a page, it will be truncated. Writing to this file
+	adds bad blocks without acknowledging them. This is largely
+	for testing.
+
 
 
 An active md device will also contain and entry for each active device
diff --git a/Documentation/security/keys-ecryptfs.txt b/Documentation/security/keys-ecryptfs.txt
new file mode 100644
index 0000000..c3bbeba
--- /dev/null
+++ b/Documentation/security/keys-ecryptfs.txt
@@ -0,0 +1,68 @@
+		Encrypted keys for the eCryptfs filesystem
+
+ECryptfs is a stacked filesystem which transparently encrypts and decrypts each
+file using a randomly generated File Encryption Key (FEK).
+
+Each FEK is in turn encrypted with a File Encryption Key Encryption Key (FEFEK)
+either in kernel space or in user space with a daemon called 'ecryptfsd'.  In
+the former case the operation is performed directly by the kernel CryptoAPI
+using a key, the FEFEK, derived from a user prompted passphrase;  in the latter
+the FEK is encrypted by 'ecryptfsd' with the help of external libraries in order
+to support other mechanisms like public key cryptography, PKCS#11 and TPM based
+operations.
+
+The data structure defined by eCryptfs to contain information required for the
+FEK decryption is called authentication token and, currently, can be stored in a
+kernel key of the 'user' type, inserted in the user's session specific keyring
+by the userspace utility 'mount.ecryptfs' shipped with the package
+'ecryptfs-utils'.
+
+The 'encrypted' key type has been extended with the introduction of the new
+format 'ecryptfs' in order to be used in conjunction with the eCryptfs
+filesystem.  Encrypted keys of the newly introduced format store an
+authentication token in its payload with a FEFEK randomly generated by the
+kernel and protected by the parent master key.
+
+In order to avoid known-plaintext attacks, the datablob obtained through
+commands 'keyctl print' or 'keyctl pipe' does not contain the overall
+authentication token, which content is well known, but only the FEFEK in
+encrypted form.
+
+The eCryptfs filesystem may really benefit from using encrypted keys in that the
+required key can be securely generated by an Administrator and provided at boot
+time after the unsealing of a 'trusted' key in order to perform the mount in a
+controlled environment.  Another advantage is that the key is not exposed to
+threats of malicious software, because it is available in clear form only at
+kernel level.
+
+Usage:
+   keyctl add encrypted name "new ecryptfs key-type:master-key-name keylen" ring
+   keyctl add encrypted name "load hex_blob" ring
+   keyctl update keyid "update key-type:master-key-name"
+
+name:= '<16 hexadecimal characters>'
+key-type:= 'trusted' | 'user'
+keylen:= 64
+
+
+Example of encrypted key usage with the eCryptfs filesystem:
+
+Create an encrypted key "1000100010001000" of length 64 bytes with format
+'ecryptfs' and save it using a previously loaded user key "test":
+
+    $ keyctl add encrypted 1000100010001000 "new ecryptfs user:test 64" @u
+    19184530
+
+    $ keyctl print 19184530
+    ecryptfs user:test 64 490045d4bfe48c99f0d465fbbbb79e7500da954178e2de0697
+    dd85091f5450a0511219e9f7cd70dcd498038181466f78ac8d4c19504fcc72402bfc41c2
+    f253a41b7507ccaa4b2b03fff19a69d1cc0b16e71746473f023a95488b6edfd86f7fdd40
+    9d292e4bacded1258880122dd553a661
+
+    $ keyctl pipe 19184530 > ecryptfs.blob
+
+Mount an eCryptfs filesystem using the created encrypted key "1000100010001000"
+into the '/secret' directory:
+
+    $ mount -i -t ecryptfs -oecryptfs_sig=1000100010001000,\
+      ecryptfs_cipher=aes,ecryptfs_key_bytes=32 /secret /secret
diff --git a/Documentation/security/keys-trusted-encrypted.txt b/Documentation/security/keys-trusted-encrypted.txt
index 8fb79bc..5f50cca 100644
--- a/Documentation/security/keys-trusted-encrypted.txt
+++ b/Documentation/security/keys-trusted-encrypted.txt
@@ -53,12 +53,19 @@
 should therefore be loaded in as secure a way as possible, preferably early in
 boot.
 
-Usage:
-  keyctl add encrypted name "new key-type:master-key-name keylen" ring
-  keyctl add encrypted name "load hex_blob" ring
-  keyctl update keyid "update key-type:master-key-name"
+The decrypted portion of encrypted keys can contain either a simple symmetric
+key or a more complex structure. The format of the more complex structure is
+application specific, which is identified by 'format'.
 
-where 'key-type' is either 'trusted' or 'user'.
+Usage:
+    keyctl add encrypted name "new [format] key-type:master-key-name keylen"
+        ring
+    keyctl add encrypted name "load hex_blob" ring
+    keyctl update keyid "update key-type:master-key-name"
+
+format:= 'default | ecryptfs'
+key-type:= 'trusted' | 'user'
+
 
 Examples of trusted and encrypted key usage:
 
@@ -114,15 +121,25 @@
     7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
     df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
 
-Create and save an encrypted key "evm" using the above trusted key "kmk":
+The initial consumer of trusted keys is EVM, which at boot time needs a high
+quality symmetric key for HMAC protection of file metadata.  The use of a
+trusted key provides strong guarantees that the EVM key has not been
+compromised by a user level problem, and when sealed to specific boot PCR
+values, protects against boot and offline attacks.  Create and save an
+encrypted key "evm" using the above trusted key "kmk":
 
+option 1: omitting 'format'
     $ keyctl add encrypted evm "new trusted:kmk 32" @u
     159771175
 
+option 2: explicitly defining 'format' as 'default'
+    $ keyctl add encrypted evm "new default trusted:kmk 32" @u
+    159771175
+
     $ keyctl print 159771175
-    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
-    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
-    5972dcb82ab2dde83376d82b2e3c09ffc
+    default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+    82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+    24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
 
     $ keyctl pipe 159771175 > evm.blob
 
@@ -132,14 +149,11 @@
     831684262
 
     $ keyctl print 831684262
-    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
-    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
-    5972dcb82ab2dde83376d82b2e3c09ffc
+    default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+    82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+    24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
 
-
-The initial consumer of trusted keys is EVM, which at boot time needs a high
-quality symmetric key for HMAC protection of file metadata.  The use of a
-trusted key provides strong guarantees that the EVM key has not been
-compromised by a user level problem, and when sealed to specific boot PCR
-values, protects against boot and offline attacks.  Other uses for trusted and
-encrypted keys, such as for disk and file encryption are anticipated.
+Other uses for trusted and encrypted keys, such as for disk and file encryption
+are anticipated.  In particular the new format 'ecryptfs' has been defined in
+in order to use encrypted keys to mount an eCryptfs filesystem.  More details
+about the usage can be found in the file 'Documentation/keys-ecryptfs.txt'.
diff --git a/Documentation/watchdog/00-INDEX b/Documentation/watchdog/00-INDEX
index ee99451..fc51128 100644
--- a/Documentation/watchdog/00-INDEX
+++ b/Documentation/watchdog/00-INDEX
@@ -8,6 +8,8 @@
 	- directory holding watchdog related example programs.
 watchdog-api.txt
 	- description of the Linux Watchdog driver API.
+watchdog-kernel-api.txt
+	- description of the Linux WatchDog Timer Driver Core kernel API.
 watchdog-parameters.txt
 	- information on driver parameters (for drivers other than
 	  the ones that have driver-specific files here)
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
new file mode 100644
index 0000000..4f7c894
--- /dev/null
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -0,0 +1,162 @@
+The Linux WatchDog Timer Driver Core kernel API.
+===============================================
+Last reviewed: 22-Jul-2011
+
+Wim Van Sebroeck <wim@iguana.be>
+
+Introduction
+------------
+This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
+It also does not describe the API which can be used by user space to communicate
+with a WatchDog Timer. If you want to know this then please read the following
+file: Documentation/watchdog/watchdog-api.txt .
+
+So what does this document describe? It describes the API that can be used by
+WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
+Framework. This framework provides all interfacing towards user space so that
+the same code does not have to be reproduced each time. This also means that
+a watchdog timer driver then only needs to provide the different routines
+(operations) that control the watchdog timer (WDT).
+
+The API
+-------
+Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
+must #include <linux/watchdog.h> (you would have to do this anyway when
+writing a watchdog device driver). This include file contains following
+register/unregister routines:
+
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
+The watchdog_register_device routine registers a watchdog timer device.
+The parameter of this routine is a pointer to a watchdog_device structure.
+This routine returns zero on success and a negative errno code for failure.
+
+The watchdog_unregister_device routine deregisters a registered watchdog timer
+device. The parameter of this routine is the pointer to the registered
+watchdog_device structure.
+
+The watchdog device structure looks like this:
+
+struct watchdog_device {
+	const struct watchdog_info *info;
+	const struct watchdog_ops *ops;
+	unsigned int bootstatus;
+	unsigned int timeout;
+	unsigned int min_timeout;
+	unsigned int max_timeout;
+	void *driver_data;
+	unsigned long status;
+};
+
+It contains following fields:
+* info: a pointer to a watchdog_info structure. This structure gives some
+  additional information about the watchdog timer itself. (Like it's unique name)
+* ops: a pointer to the list of watchdog operations that the watchdog supports.
+* timeout: the watchdog timer's timeout value (in seconds).
+* min_timeout: the watchdog timer's minimum timeout value (in seconds).
+* max_timeout: the watchdog timer's maximum timeout value (in seconds).
+* bootstatus: status of the device after booting (reported with watchdog
+  WDIOF_* status bits).
+* driver_data: a pointer to the drivers private data of a watchdog device.
+  This data should only be accessed via the watchdog_set_drvadata and
+  watchdog_get_drvdata routines.
+* status: this field contains a number of status bits that give extra
+  information about the status of the device (Like: is the watchdog timer
+  running/active, is the nowayout bit set, is the device opened via
+  the /dev/watchdog interface or not, ...).
+
+The list of watchdog operations is defined as:
+
+struct watchdog_ops {
+	struct module *owner;
+	/* mandatory operations */
+	int (*start)(struct watchdog_device *);
+	int (*stop)(struct watchdog_device *);
+	/* optional operations */
+	int (*ping)(struct watchdog_device *);
+	unsigned int (*status)(struct watchdog_device *);
+	int (*set_timeout)(struct watchdog_device *, unsigned int);
+	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+It is important that you first define the module owner of the watchdog timer
+driver's operations. This module owner will be used to lock the module when
+the watchdog is active. (This to avoid a system crash when you unload the
+module and /dev/watchdog is still open).
+Some operations are mandatory and some are optional. The mandatory operations
+are:
+* start: this is a pointer to the routine that starts the watchdog timer
+  device.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+* stop: with this routine the watchdog timer device is being stopped.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Some watchdog timer hardware can only be started and not be stopped. The
+  driver supporting this hardware needs to make sure that a start and stop
+  routine is being provided. This can be done by using a timer in the driver
+  that regularly sends a keepalive ping to the watchdog timer hardware.
+
+Not all watchdog timer hardware supports the same functionality. That's why
+all other routines/operations are optional. They only need to be provided if
+they are supported. These optional routines/operations are:
+* ping: this is the routine that sends a keepalive ping to the watchdog timer
+  hardware.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Most hardware that does not support this as a separate function uses the
+  start function to restart the watchdog timer hardware. And that's also what
+  the watchdog timer driver core does: to send a keepalive ping to the watchdog
+  timer hardware it will either use the ping operation (when available) or the
+  start operation (when the ping operation is not available).
+  (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
+  WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
+  info structure).
+* status: this routine checks the status of the watchdog timer device. The
+  status of the device is reported with watchdog WDIOF_* status flags/bits.
+* set_timeout: this routine checks and changes the timeout of the watchdog
+  timer device. It returns 0 on success, -EINVAL for "parameter out of range"
+  and -EIO for "could not write value to the watchdog". On success the timeout
+  value of the watchdog_device will be changed to the value that was just used
+  to re-program the watchdog timer device.
+  (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
+  watchdog's info structure).
+* ioctl: if this routine is present then it will be called first before we do
+  our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
+  if a command is not supported. The parameters that are passed to the ioctl
+  call are: watchdog_device, cmd and arg.
+
+The status bits should (preferably) be set with the set_bit and clear_bit alike
+bit-operations. The status bits that are defined are:
+* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
+  is active or not. When the watchdog is active after booting, then you should
+  set this status bit (Note: when you register the watchdog timer device with
+  this bit set, then opening /dev/watchdog will skip the start operation)
+* WDOG_DEV_OPEN: this status bit shows whether or not the watchdog device
+  was opened via /dev/watchdog.
+  (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_ALLOW_RELEASE: this bit stores whether or not the magic close character
+  has been sent (so that we can support the magic close feature).
+  (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
+  If this bit is set then the watchdog timer will not be able to stop.
+
+Note: The WatchDog Timer Driver Core supports the magic close feature and
+the nowayout feature. To use the magic close feature you must set the
+WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
+The nowayout feature will overrule the magic close feature.
+
+To get or set driver specific data the following two helper functions should be
+used:
+
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+
+The watchdog_set_drvdata function allows you to add driver specific data. The
+arguments of this function are the watchdog device where you want to add the
+driver specific data to and a pointer to the data itself.
+
+The watchdog_get_drvdata function allows you to retrieve driver specific data.
+The argument of this function is the watchdog device where you want to retrieve
+data from. The function retruns the pointer to the driver specific data.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7b2e9e8..1d2e79d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6408,7 +6408,7 @@
 L:	tomoyo-dev@lists.sourceforge.jp (subscribers-only, for developers in Japanese)
 L:	tomoyo-users@lists.sourceforge.jp (subscribers-only, for users in Japanese)
 W:	http://tomoyo.sourceforge.jp/
-T:	quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.3.x/tomoyo-lsm/patches/
+T:	quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.4.x/tomoyo-lsm/patches/
 S:	Maintained
 F:	security/tomoyo/
 
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 4b8abf9..d82ebab 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -27,14 +27,14 @@
 
 config MACH_HARMONY
        bool "Harmony board"
-       select MACH_HAS_SND_SOC_TEGRA_WM8903
+       select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
        help
          Support for nVidia Harmony development platform
 
 config MACH_KAEN
        bool "Kaen board"
        select MACH_SEABOARD
-       select MACH_HAS_SND_SOC_TEGRA_WM8903
+       select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
        help
          Support for the Kaen version of Seaboard
 
@@ -45,7 +45,7 @@
 
 config MACH_SEABOARD
        bool "Seaboard board"
-       select MACH_HAS_SND_SOC_TEGRA_WM8903
+       select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
        help
          Support for nVidia Seaboard development platform. It will
 	 also be included for some of the derivative boards that
diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile
index c550c67..397268c 100644
--- a/arch/arm/mach-zynq/Makefile
+++ b/arch/arm/mach-zynq/Makefile
@@ -3,4 +3,4 @@
 #
 
 # Common support
-obj-y 				:= common.o timer.o board_dt.o
+obj-y				:= common.o timer.o
diff --git a/arch/arm/mach-zynq/board_dt.c b/arch/arm/mach-zynq/board_dt.c
deleted file mode 100644
index e69de29..0000000
--- a/arch/arm/mach-zynq/board_dt.c
+++ /dev/null
diff --git a/arch/microblaze/include/asm/cpuinfo.h b/arch/microblaze/include/asm/cpuinfo.h
index d8f0133..7d6831a 100644
--- a/arch/microblaze/include/asm/cpuinfo.h
+++ b/arch/microblaze/include/asm/cpuinfo.h
@@ -38,6 +38,7 @@
 	u32 use_exc;
 	u32 ver_code;
 	u32 mmu;
+	u32 mmu_privins;
 	u32 endian;
 
 	/* CPU caches */
diff --git a/arch/microblaze/include/asm/irqflags.h b/arch/microblaze/include/asm/irqflags.h
index c4532f0..c9a6262 100644
--- a/arch/microblaze/include/asm/irqflags.h
+++ b/arch/microblaze/include/asm/irqflags.h
@@ -14,7 +14,7 @@
 
 #if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
 	asm volatile("	msrclr %0, %1	\n"
@@ -25,7 +25,7 @@
 	return flags;
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
 	/* this uses r0 without declaring it - is that correct? */
 	asm volatile("	msrclr r0, %0	\n"
@@ -35,7 +35,7 @@
 		     : "memory");
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
 	/* this uses r0 without declaring it - is that correct? */
 	asm volatile("	msrset	r0, %0	\n"
@@ -47,7 +47,7 @@
 
 #else /* !CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags, tmp;
 	asm volatile ("	mfs	%0, rmsr	\n"
@@ -61,7 +61,7 @@
 	return flags;
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
 	unsigned long tmp;
 	asm volatile("	mfs	%0, rmsr	\n"
@@ -74,7 +74,7 @@
 		     : "memory");
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
 	unsigned long tmp;
 	asm volatile("	mfs	%0, rmsr	\n"
@@ -89,7 +89,7 @@
 
 #endif /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 	asm volatile("	mfs	%0, rmsr	\n"
@@ -100,7 +100,7 @@
 	return flags;
 }
 
-static inline void arch_local_irq_restore(unsigned long flags)
+static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
 	asm volatile("	mts	rmsr, %0	\n"
 		     "	nop			\n"
@@ -109,12 +109,12 @@
 		     : "memory");
 }
 
-static inline bool arch_irqs_disabled_flags(unsigned long flags)
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags & MSR_IE) == 0;
 }
 
-static inline bool arch_irqs_disabled(void)
+static inline notrace bool arch_irqs_disabled(void)
 {
 	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index aed2a6b..7283bfb 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -125,9 +125,6 @@
 	.pgdir = swapper_pg_dir, \
 }
 
-/* Do necessary setup to start up a newly executed thread.  */
-void start_thread(struct pt_regs *regs,
-		unsigned long pc, unsigned long usp);
 
 /* Free all resources held by a thread. */
 extern inline void release_thread(struct task_struct *dead_task)
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 9ad567e..20c5e8e 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -26,8 +26,12 @@
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
 /* Other Prototypes */
-extern int early_uartlite_console(void);
-extern int early_uart16550_console(void);
+enum early_consoles {
+	UARTLITE = 1,
+	UART16550 = 2,
+};
+
+extern int of_early_console(void *version);
 
 /*
  * OF address retreival & translation
diff --git a/arch/microblaze/include/asm/pvr.h b/arch/microblaze/include/asm/pvr.h
index a10bec6..4bbdb4c 100644
--- a/arch/microblaze/include/asm/pvr.h
+++ b/arch/microblaze/include/asm/pvr.h
@@ -111,16 +111,16 @@
 /* Target family PVR mask */
 #define PVR10_TARGET_FAMILY_MASK	0xFF000000
 
-/* MMU descrtiption */
+/* MMU description */
 #define PVR11_USE_MMU			0xC0000000
 #define PVR11_MMU_ITLB_SIZE		0x38000000
 #define PVR11_MMU_DTLB_SIZE		0x07000000
 #define PVR11_MMU_TLB_ACCESS		0x00C00000
 #define PVR11_MMU_ZONES			0x003C0000
+#define PVR11_MMU_PRIVINS		0x00010000
 /* MSR Reset value PVR mask */
 #define PVR11_MSR_RESET_VALUE_MASK	0x000007FF
 
-
 /* PVR access macros */
 #define PVR_IS_FULL(_pvr)	(_pvr.pvr[0] & PVR0_PVR_FULL_MASK)
 #define PVR_USE_BARREL(_pvr)	(_pvr.pvr[0] & PVR0_USE_BARREL_MASK)
@@ -216,6 +216,7 @@
 #define PVR_MMU_DTLB_SIZE(_pvr)		(_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
 #define PVR_MMU_TLB_ACCESS(_pvr)	(_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
 #define PVR_MMU_ZONES(_pvr)		(_pvr.pvr[11] & PVR11_MMU_ZONES)
+#define PVR_MMU_PRIVINS(pvr)		(pvr.pvr[11] & PVR11_MMU_PRIVINS)
 
 /* endian */
 #define PVR_ENDIAN(_pvr)	(_pvr.pvr[0] & PVR0_ENDI)
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 8f39689..904e5ef 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -23,6 +23,7 @@
 void early_printk(const char *fmt, ...);
 
 int setup_early_printk(char *opt);
+void remap_early_printk(void);
 void disable_early_printk(void);
 
 #if defined(CONFIG_EARLY_PRINTK)
diff --git a/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c b/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
index f70a604..916aaed 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
@@ -72,6 +72,7 @@
 	CI(pvr_user2, USER2);
 
 	CI(mmu, USE_MMU);
+	CI(mmu_privins, MMU_PRIVINS);
 	CI(endian, ENDIAN);
 
 	CI(use_icache, USE_ICACHE);
diff --git a/arch/microblaze/kernel/cpu/cpuinfo-static.c b/arch/microblaze/kernel/cpu/cpuinfo-static.c
index b16b994..592bb2e 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo-static.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo-static.c
@@ -119,6 +119,7 @@
 	ci->pvr_user2 = fcpu(cpu, "xlnx,pvr-user2");
 
 	ci->mmu = fcpu(cpu, "xlnx,use-mmu");
+	ci->mmu_privins = fcpu(cpu, "xlnx,mmu-privileged-instr");
 	ci->endian = fcpu(cpu, "xlnx,endianness");
 
 	ci->ver_code = 0;
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index c1640c5..44394d8 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -88,4 +88,8 @@
 		printk(KERN_WARNING "%s: Unsupported PVR setting\n", __func__);
 		set_cpuinfo_static(&cpuinfo, cpu);
 	}
+
+	if (cpuinfo.mmu_privins)
+		printk(KERN_WARNING "%s: Stream instructions enabled"
+			" - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
 }
diff --git a/arch/microblaze/kernel/cpu/mb.c b/arch/microblaze/kernel/cpu/mb.c
index b4048af..7b5dca7 100644
--- a/arch/microblaze/kernel/cpu/mb.c
+++ b/arch/microblaze/kernel/cpu/mb.c
@@ -97,6 +97,10 @@
 		(cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
 		(cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
 
+	count += seq_printf(m,
+			"Stream-insns:\t%sprivileged\n",
+			cpuinfo.mmu_privins ? "un" : "");
+
 	if (cpuinfo.use_icache)
 		count += seq_printf(m,
 				"Icache:\t\t%ukB\tline length:\t%dB\n",
@@ -110,10 +114,11 @@
 				"Dcache:\t\t%ukB\tline length:\t%dB\n",
 				cpuinfo.dcache_size >> 10,
 				cpuinfo.dcache_line_length);
+		seq_printf(m, "Dcache-Policy:\t");
 		if (cpuinfo.dcache_wb)
-			count += seq_printf(m, "\t\twrite-back\n");
+			count += seq_printf(m, "write-back\n");
 		else
-			count += seq_printf(m, "\t\twrite-through\n");
+			count += seq_printf(m, "write-through\n");
 	} else
 		count += seq_printf(m, "Dcache:\t\tno\n");
 
diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c
index c3616a0..d26d92d 100644
--- a/arch/microblaze/kernel/early_printk.c
+++ b/arch/microblaze/kernel/early_printk.c
@@ -35,7 +35,7 @@
 	 * we'll never timeout on a working UART.
 	 */
 
-	unsigned retries = 10000;
+	unsigned retries = 1000000;
 	/* read status bit - 0x8 offset */
 	while (--retries && (in_be32(base_addr + 8) & (1 << 3)))
 		;
@@ -60,7 +60,7 @@
 static struct console early_serial_uartlite_console = {
 	.name = "earlyser",
 	.write = early_printk_uartlite_write,
-	.flags = CON_PRINTBUFFER,
+	.flags = CON_PRINTBUFFER | CON_BOOT,
 	.index = -1,
 };
 #endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */
@@ -104,7 +104,7 @@
 static struct console early_serial_uart16550_console = {
 	.name = "earlyser",
 	.write = early_printk_uart16550_write,
-	.flags = CON_PRINTBUFFER,
+	.flags = CON_PRINTBUFFER | CON_BOOT,
 	.index = -1,
 };
 #endif /* CONFIG_SERIAL_8250_CONSOLE */
@@ -127,48 +127,56 @@
 
 int __init setup_early_printk(char *opt)
 {
+	int version = 0;
+
 	if (early_console_initialized)
 		return 1;
 
+	base_addr = of_early_console(&version);
+	if (base_addr) {
+#ifdef CONFIG_MMU
+		early_console_reg_tlb_alloc(base_addr);
+#endif
+		switch (version) {
 #ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
-	base_addr = early_uartlite_console();
-	if (base_addr) {
-		early_console_initialized = 1;
-#ifdef CONFIG_MMU
-		early_console_reg_tlb_alloc(base_addr);
+		case UARTLITE:
+			printk(KERN_INFO "Early console on uartlite "
+						"at 0x%08x\n", base_addr);
+			early_console = &early_serial_uartlite_console;
+			break;
 #endif
-		early_console = &early_serial_uartlite_console;
-		early_printk("early_printk_console is enabled at 0x%08x\n",
-							base_addr);
-
-		/* register_console(early_console); */
-
-		return 0;
-	}
-#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */
-
 #ifdef CONFIG_SERIAL_8250_CONSOLE
-	base_addr = early_uart16550_console();
-	base_addr &= ~3; /* clear register offset */
-	if (base_addr) {
-		early_console_initialized = 1;
-#ifdef CONFIG_MMU
-		early_console_reg_tlb_alloc(base_addr);
+		case UART16550:
+			printk(KERN_INFO "Early console on uart16650 "
+						"at 0x%08x\n", base_addr);
+			early_console = &early_serial_uart16550_console;
+			break;
 #endif
-		early_console = &early_serial_uart16550_console;
+		default:
+			printk(KERN_INFO  "Unsupported early console %d\n",
+								version);
+			return 1;
+		}
 
-		early_printk("early_printk_console is enabled at 0x%08x\n",
-							base_addr);
-
-		/* register_console(early_console); */
-
+		register_console(early_console);
+		early_console_initialized = 1;
 		return 0;
 	}
-#endif /* CONFIG_SERIAL_8250_CONSOLE */
-
 	return 1;
 }
 
+/* Remap early console to virtual address and do not allocate one TLB
+ * only for early console because of performance degression */
+void __init remap_early_printk(void)
+{
+	if (!early_console_initialized || !early_console)
+		return;
+	printk(KERN_INFO "early_printk_console remaping from 0x%x to ",
+								base_addr);
+	base_addr = (u32) ioremap(base_addr, PAGE_SIZE);
+	printk(KERN_CONT "0x%x\n", base_addr);
+}
+
 void __init disable_early_printk(void)
 {
 	if (!early_console_initialized || !early_console)
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index 56572e9..e62be83 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -1113,23 +1113,23 @@
 lw_r11_vm:	R3_TO_LWREG_VM_V	(11);
 lw_r12_vm:	R3_TO_LWREG_VM_V	(12);
 lw_r13_vm:	R3_TO_LWREG_VM_V	(13);
-lw_r14_vm:	R3_TO_LWREG_VM		(14);
+lw_r14_vm:	R3_TO_LWREG_VM_V	(14);
 lw_r15_vm:	R3_TO_LWREG_VM_V	(15);
-lw_r16_vm:	R3_TO_LWREG_VM		(16);
+lw_r16_vm:	R3_TO_LWREG_VM_V	(16);
 lw_r17_vm:	R3_TO_LWREG_VM_V	(17);
 lw_r18_vm:	R3_TO_LWREG_VM_V	(18);
-lw_r19_vm:	R3_TO_LWREG_VM		(19);
-lw_r20_vm:	R3_TO_LWREG_VM		(20);
-lw_r21_vm:	R3_TO_LWREG_VM		(21);
-lw_r22_vm:	R3_TO_LWREG_VM		(22);
-lw_r23_vm:	R3_TO_LWREG_VM		(23);
-lw_r24_vm:	R3_TO_LWREG_VM		(24);
-lw_r25_vm:	R3_TO_LWREG_VM		(25);
-lw_r26_vm:	R3_TO_LWREG_VM		(26);
-lw_r27_vm:	R3_TO_LWREG_VM		(27);
-lw_r28_vm:	R3_TO_LWREG_VM		(28);
-lw_r29_vm:	R3_TO_LWREG_VM		(29);
-lw_r30_vm:	R3_TO_LWREG_VM		(30);
+lw_r19_vm:	R3_TO_LWREG_VM_V	(19);
+lw_r20_vm:	R3_TO_LWREG_VM_V	(20);
+lw_r21_vm:	R3_TO_LWREG_VM_V	(21);
+lw_r22_vm:	R3_TO_LWREG_VM_V	(22);
+lw_r23_vm:	R3_TO_LWREG_VM_V	(23);
+lw_r24_vm:	R3_TO_LWREG_VM_V	(24);
+lw_r25_vm:	R3_TO_LWREG_VM_V	(25);
+lw_r26_vm:	R3_TO_LWREG_VM_V	(26);
+lw_r27_vm:	R3_TO_LWREG_VM_V	(27);
+lw_r28_vm:	R3_TO_LWREG_VM_V	(28);
+lw_r29_vm:	R3_TO_LWREG_VM_V	(29);
+lw_r30_vm:	R3_TO_LWREG_VM_V	(30);
 lw_r31_vm:	R3_TO_LWREG_VM_V	(31);
 
 sw_table_vm:
@@ -1147,23 +1147,23 @@
 sw_r11_vm:	SWREG_TO_R3_VM_V	(11);
 sw_r12_vm:	SWREG_TO_R3_VM_V	(12);
 sw_r13_vm:	SWREG_TO_R3_VM_V	(13);
-sw_r14_vm:	SWREG_TO_R3_VM		(14);
+sw_r14_vm:	SWREG_TO_R3_VM_V	(14);
 sw_r15_vm:	SWREG_TO_R3_VM_V	(15);
-sw_r16_vm:	SWREG_TO_R3_VM		(16);
+sw_r16_vm:	SWREG_TO_R3_VM_V	(16);
 sw_r17_vm:	SWREG_TO_R3_VM_V	(17);
 sw_r18_vm:	SWREG_TO_R3_VM_V	(18);
-sw_r19_vm:	SWREG_TO_R3_VM		(19);
-sw_r20_vm:	SWREG_TO_R3_VM		(20);
-sw_r21_vm:	SWREG_TO_R3_VM		(21);
-sw_r22_vm:	SWREG_TO_R3_VM		(22);
-sw_r23_vm:	SWREG_TO_R3_VM		(23);
-sw_r24_vm:	SWREG_TO_R3_VM		(24);
-sw_r25_vm:	SWREG_TO_R3_VM		(25);
-sw_r26_vm:	SWREG_TO_R3_VM		(26);
-sw_r27_vm:	SWREG_TO_R3_VM		(27);
-sw_r28_vm:	SWREG_TO_R3_VM		(28);
-sw_r29_vm:	SWREG_TO_R3_VM		(29);
-sw_r30_vm:	SWREG_TO_R3_VM		(30);
+sw_r19_vm:	SWREG_TO_R3_VM_V	(19);
+sw_r20_vm:	SWREG_TO_R3_VM_V	(20);
+sw_r21_vm:	SWREG_TO_R3_VM_V	(21);
+sw_r22_vm:	SWREG_TO_R3_VM_V	(22);
+sw_r23_vm:	SWREG_TO_R3_VM_V	(23);
+sw_r24_vm:	SWREG_TO_R3_VM_V	(24);
+sw_r25_vm:	SWREG_TO_R3_VM_V	(25);
+sw_r26_vm:	SWREG_TO_R3_VM_V	(26);
+sw_r27_vm:	SWREG_TO_R3_VM_V	(27);
+sw_r28_vm:	SWREG_TO_R3_VM_V	(28);
+sw_r29_vm:	SWREG_TO_R3_VM_V	(29);
+sw_r30_vm:	SWREG_TO_R3_VM_V	(30);
 sw_r31_vm:	SWREG_TO_R3_VM_V	(31);
 #endif /* CONFIG_MMU */
 
diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c
index c88f066..eb41441 100644
--- a/arch/microblaze/kernel/intc.c
+++ b/arch/microblaze/kernel/intc.c
@@ -134,7 +134,7 @@
 	intr_type =
 		be32_to_cpup(of_get_property(intc,
 						"xlnx,kind-of-intr", NULL));
-	if (intr_type >= (1 << (nr_irq + 1)))
+	if (intr_type > (u32)((1ULL << nr_irq) - 1))
 		printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n");
 
 #ifdef CONFIG_SELFMOD_INTC
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 968648a..dbb8124 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -237,7 +237,6 @@
 /* Set up a thread for executing a new program */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
 {
-	set_fs(USER_DS);
 	regs->pc = pc;
 	regs->r1 = usp;
 	regs->pt_mode = 0;
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index b15cc21..977484a 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -53,69 +53,58 @@
 }
 
 #ifdef CONFIG_EARLY_PRINTK
-/* MS this is Microblaze specifig function */
-static int __init early_init_dt_scan_serial(unsigned long node,
+char *stdout;
+
+int __init early_init_dt_scan_chosen_serial(unsigned long node,
 				const char *uname, int depth, void *data)
 {
 	unsigned long l;
 	char *p;
-	const __be32 *addr;
 
-	pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname);
+	pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname);
 
-/* find all serial nodes */
-	if (strncmp(uname, "serial", 6) != 0)
-		return 0;
+	if (depth == 1 && (strcmp(uname, "chosen") == 0 ||
+				strcmp(uname, "chosen@0") == 0)) {
+		p = of_get_flat_dt_prop(node, "linux,stdout-path", &l);
+		if (p != NULL && l > 0)
+			stdout = p; /* store pointer to stdout-path */
+	}
 
-/* find compatible node with uartlite */
-	p = of_get_flat_dt_prop(node, "compatible", &l);
-	if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) &&
-			(strncmp(p, "xlnx,opb-uartlite", 17) != 0) &&
-			(strncmp(p, "xlnx,axi-uartlite", 17) != 0))
-		return 0;
+	if (stdout && strstr(stdout, uname)) {
+		p = of_get_flat_dt_prop(node, "compatible", &l);
+		pr_debug("Compatible string: %s\n", p);
 
-	addr = of_get_flat_dt_prop(node, "reg", &l);
-	return be32_to_cpup(addr); /* return address */
+		if ((strncmp(p, "xlnx,xps-uart16550", 18) == 0) ||
+			(strncmp(p, "xlnx,axi-uart16550", 18) == 0)) {
+			unsigned int addr;
+
+			*(u32 *)data = UART16550;
+
+			addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l);
+			addr += *(u32 *)of_get_flat_dt_prop(node,
+							"reg-offset", &l);
+			/* clear register offset */
+			return be32_to_cpu(addr) & ~3;
+		}
+		if ((strncmp(p, "xlnx,xps-uartlite", 17) == 0) ||
+				(strncmp(p, "xlnx,opb-uartlite", 17) == 0) ||
+				(strncmp(p, "xlnx,axi-uartlite", 17) == 0) ||
+				(strncmp(p, "xlnx,mdm", 8) == 0)) {
+			unsigned int *addrp;
+
+			*(u32 *)data = UARTLITE;
+
+			addrp = of_get_flat_dt_prop(node, "reg", &l);
+			return be32_to_cpup(addrp); /* return address */
+		}
+	}
+	return 0;
 }
 
-/* this function is looking for early uartlite console - Microblaze specific */
-int __init early_uartlite_console(void)
+/* this function is looking for early console - Microblaze specific */
+int __init of_early_console(void *version)
 {
-	return of_scan_flat_dt(early_init_dt_scan_serial, NULL);
-}
-
-/* MS this is Microblaze specifig function */
-static int __init early_init_dt_scan_serial_full(unsigned long node,
-				const char *uname, int depth, void *data)
-{
-	unsigned long l;
-	char *p;
-	unsigned int addr;
-
-	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
-
-/* find all serial nodes */
-	if (strncmp(uname, "serial", 6) != 0)
-		return 0;
-
-	early_init_dt_check_for_initrd(node);
-
-/* find compatible node with uartlite */
-	p = of_get_flat_dt_prop(node, "compatible", &l);
-
-	if ((strncmp(p, "xlnx,xps-uart16550", 18) != 0) &&
-		(strncmp(p, "xlnx,axi-uart16550", 18) != 0))
-		return 0;
-
-	addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l);
-	addr += *(u32 *)of_get_flat_dt_prop(node, "reg-offset", &l);
-	return be32_to_cpu(addr); /* return address */
-}
-
-/* this function is looking for early uartlite console - Microblaze specific */
-int __init early_uart16550_console(void)
-{
-	return of_scan_flat_dt(early_init_dt_scan_serial_full, NULL);
+	return of_scan_flat_dt(early_init_dt_scan_chosen_serial, version);
 }
 #endif
 
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 8e2c09b..0e654a1 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -59,6 +59,11 @@
 
 	setup_memory();
 
+#ifdef CONFIG_EARLY_PRINTK
+	/* remap early console to virtual address */
+	remap_early_printk();
+#endif
+
 	xilinx_pci_init();
 
 #if defined(CONFIG_SELFMOD_INTC) || defined(CONFIG_SELFMOD_TIMER)
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index cfa9cd2..64f7a00 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -177,9 +177,11 @@
 		cap |= HWCAP_SPARC_ULTRA3;
 	else if (tlb_type == hypervisor) {
 		if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
-		    sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
 			cap |= HWCAP_SPARC_BLKINIT;
-		if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
+		if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
 			cap |= HWCAP_SPARC_N2;
 	}
 
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 7568640..7a5f80d 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2950,6 +2950,7 @@
 #define HV_GRP_N2_CPU			0x0202
 #define HV_GRP_NIU			0x0204
 #define HV_GRP_VF_CPU			0x0205
+#define HV_GRP_KT_CPU			0x0209
 #define HV_GRP_DIAG			0x0300
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index f0d0c40c4..55a17c6 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -42,6 +42,7 @@
 #define SUN4V_CHIP_INVALID	0x00
 #define SUN4V_CHIP_NIAGARA1	0x01
 #define SUN4V_CHIP_NIAGARA2	0x02
+#define SUN4V_CHIP_NIAGARA3	0x03
 #define SUN4V_CHIP_UNKNOWN	0xff
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
index bee4bf4..9ed6ff6 100644
--- a/arch/sparc/include/asm/xor_64.h
+++ b/arch/sparc/include/asm/xor_64.h
@@ -65,6 +65,7 @@
 #define XOR_SELECT_TEMPLATE(FASTEST) \
 	((tlb_type == hypervisor && \
 	  (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
-	   sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \
 	 &xor_block_niagara : \
 	 &xor_block_VIS)
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 138dbbc..17cf290 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -474,11 +474,18 @@
 		sparc_pmu_type = "niagara2";
 		break;
 
+	case SUN4V_CHIP_NIAGARA3:
+		sparc_cpu_type = "UltraSparc T3 (Niagara3)";
+		sparc_fpu_type = "UltraSparc T3 integrated FPU";
+		sparc_pmu_type = "niagara3";
+		break;
+
 	default:
 		printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
 		       prom_cpu_compatible);
 		sparc_cpu_type = "Unknown SUN4V CPU";
 		sparc_fpu_type = "Unknown SUN4V FPU";
+		sparc_pmu_type = "Unknown SUN4V PMU";
 		break;
 	}
 }
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index d91fd78..4197e8d 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -324,6 +324,7 @@
 	switch (sun4v_chip_type) {
 	case SUN4V_CHIP_NIAGARA1:
 	case SUN4V_CHIP_NIAGARA2:
+	case SUN4V_CHIP_NIAGARA3:
 		rover_inc_table = niagara_iterate_method;
 		break;
 	default:
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index aa594c7..c752603 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -132,6 +132,8 @@
 	.asciz	"sun4v"
 prom_niagara_prefix:
 	.asciz	"SUNW,UltraSPARC-T"
+prom_sparc_prefix:
+	.asciz	"SPARC-T"
 	.align	4
 prom_root_compatible:
 	.skip	64
@@ -382,6 +384,22 @@
 90:	ldub	[%g7], %g2
 	ldub	[%g1], %g4
 	cmp	%g2, %g4
+	bne,pn	%icc, 89f
+	 add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 90b
+	 add	%g1, 1, %g1
+	ba,pt	%xcc, 91f
+	 nop
+
+89:	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_sparc_prefix), %g7
+	or	%g7, %lo(prom_sparc_prefix), %g7
+	mov	7, %g3
+90:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
 	bne,pn	%icc, 4f
 	 add	%g7, 1, %g7
 	subcc	%g3, 1, %g3
@@ -390,6 +408,15 @@
 
 	sethi	%hi(prom_cpu_compatible), %g1
 	or	%g1, %lo(prom_cpu_compatible), %g1
+	ldub	[%g1 + 7], %g2
+	cmp	%g2, '3'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA3, %g4
+	ba,pt	%xcc, 4f
+	 nop
+
+91:	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
 	ldub	[%g1 + 17], %g2
 	cmp	%g2, '1'
 	be,pt	%xcc, 5f
@@ -397,6 +424,7 @@
 	cmp	%g2, '2'
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA2, %g4
+	
 4:
 	mov	SUN4V_CHIP_UNKNOWN, %g4
 5:	sethi	%hi(sun4v_chip_type), %g2
@@ -514,6 +542,9 @@
 	 cmp	%g1, SUN4V_CHIP_NIAGARA2
 	be,pt	%xcc, niagara2_patch
 	 nop
+	cmp	%g1, SUN4V_CHIP_NIAGARA3
+	be,pt	%xcc, niagara2_patch
+	 nop
 
 	call	generic_patch_copyops
 	 nop
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 7c60afb..d306e64 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -38,6 +38,7 @@
 	{ .group = HV_GRP_N2_CPU,				},
 	{ .group = HV_GRP_NIU,					},
 	{ .group = HV_GRP_VF_CPU,				},
+	{ .group = HV_GRP_KT_CPU,				},
 	{ .group = HV_GRP_DIAG,		.flags = FLAG_PRE_API	},
 };
 
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 8ac23e6..343b0f9 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -80,8 +80,11 @@
 {
 	unsigned long ret;
 
-	ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
-	if (ret != HV_EOK)
+	if (val & PCR_N2_HTRACE) {
+		ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
+		if (ret != HV_EOK)
+			write_pcr(val);
+	} else
 		write_pcr(val);
 }
 
@@ -106,6 +109,10 @@
 			perf_hsvc_group = HV_GRP_N2_CPU;
 			break;
 
+		case SUN4V_CHIP_NIAGARA3:
+			perf_hsvc_group = HV_GRP_KT_CPU;
+			break;
+
 		default:
 			return -ENODEV;
 		}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 171e8d8..614da62 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1343,7 +1343,8 @@
 		sparc_pmu = &niagara1_pmu;
 		return true;
 	}
-	if (!strcmp(sparc_pmu_type, "niagara2")) {
+	if (!strcmp(sparc_pmu_type, "niagara2") ||
+	    !strcmp(sparc_pmu_type, "niagara3")) {
 		sparc_pmu = &niagara2_pmu;
 		return true;
 	}
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 1a371f8..8600eb2 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -55,7 +55,7 @@
 }
 EXPORT_SYMBOL(atomic_cmpxchg);
 
-int atomic_add_unless(atomic_t *v, int a, int u)
+int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	unsigned long flags;
@@ -67,7 +67,7 @@
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret != u;
 }
-EXPORT_SYMBOL(atomic_add_unless);
+EXPORT_SYMBOL(__atomic_add_unless);
 
 /* Atomic operations are already serializing */
 void atomic_set(atomic_t *v, int i)
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index ac6739e..c3de70d 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -1,6 +1,6 @@
 /* n2-drv.c: Niagara-2 RNG driver.
  *
- * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2008, 2011 David S. Miller <davem@davemloft.net>
  */
 
 #include <linux/kernel.h>
@@ -22,8 +22,8 @@
 
 #define DRV_MODULE_NAME		"n2rng"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"0.1"
-#define DRV_MODULE_RELDATE	"May 15, 2008"
+#define DRV_MODULE_VERSION	"0.2"
+#define DRV_MODULE_RELDATE	"July 27, 2011"
 
 static char version[] __devinitdata =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -623,14 +623,14 @@
 static int __devinit n2rng_probe(struct platform_device *op)
 {
 	const struct of_device_id *match;
-	int victoria_falls;
+	int multi_capable;
 	int err = -ENOMEM;
 	struct n2rng *np;
 
 	match = of_match_device(n2rng_match, &op->dev);
 	if (!match)
 		return -EINVAL;
-	victoria_falls = (match->data != NULL);
+	multi_capable = (match->data != NULL);
 
 	n2rng_driver_version();
 	np = kzalloc(sizeof(*np), GFP_KERNEL);
@@ -640,8 +640,8 @@
 
 	INIT_DELAYED_WORK(&np->work, n2rng_work);
 
-	if (victoria_falls)
-		np->flags |= N2RNG_FLAG_VF;
+	if (multi_capable)
+		np->flags |= N2RNG_FLAG_MULTI;
 
 	err = -ENODEV;
 	np->hvapi_major = 2;
@@ -658,10 +658,10 @@
 		}
 	}
 
-	if (np->flags & N2RNG_FLAG_VF) {
+	if (np->flags & N2RNG_FLAG_MULTI) {
 		if (np->hvapi_major < 2) {
-			dev_err(&op->dev, "VF RNG requires HVAPI major "
-				"version 2 or later, got %lu\n",
+			dev_err(&op->dev, "multi-unit-capable RNG requires "
+				"HVAPI major version 2 or later, got %lu\n",
 				np->hvapi_major);
 			goto out_hvapi_unregister;
 		}
@@ -688,8 +688,8 @@
 		goto out_free_units;
 
 	dev_info(&op->dev, "Found %s RNG, units: %d\n",
-		 ((np->flags & N2RNG_FLAG_VF) ?
-		  "Victoria Falls" : "Niagara2"),
+		 ((np->flags & N2RNG_FLAG_MULTI) ?
+		  "multi-unit-capable" : "single-unit"),
 		 np->num_units);
 
 	np->hwrng.name = "n2rng";
@@ -751,6 +751,11 @@
 		.compatible	= "SUNW,vf-rng",
 		.data		= (void *) 1,
 	},
+	{
+		.name		= "random-number-generator",
+		.compatible	= "SUNW,kt-rng",
+		.data		= (void *) 1,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, n2rng_match);
diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h
index 4bea07f..f244ac8 100644
--- a/drivers/char/hw_random/n2rng.h
+++ b/drivers/char/hw_random/n2rng.h
@@ -68,7 +68,7 @@
 	struct platform_device	*op;
 
 	unsigned long		flags;
-#define N2RNG_FLAG_VF		0x00000001 /* Victoria Falls RNG, else N2 */
+#define N2RNG_FLAG_MULTI	0x00000001 /* Multi-unit capable RNG */
 #define N2RNG_FLAG_CONTROL	0x00000002 /* Operating in control domain */
 #define N2RNG_FLAG_READY	0x00000008 /* Ready for hw-rng layer      */
 #define N2RNG_FLAG_SHUTDOWN	0x00000010 /* Driver unregistering        */
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 7beb0e2..caf8012 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -534,6 +534,7 @@
 	struct duration_t *duration_cap;
 	ssize_t rc;
 	u32 timeout;
+	unsigned int scale = 1;
 
 	tpm_cmd.header.in = tpm_getcap_header;
 	tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
@@ -545,24 +546,30 @@
 	if (rc)
 		goto duration;
 
-	if (be32_to_cpu(tpm_cmd.header.out.length)
-	    != 4 * sizeof(u32))
-		goto duration;
+	if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+	    be32_to_cpu(tpm_cmd.header.out.length)
+	    != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
+		return;
 
 	timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
 	/* Don't overwrite default if value is 0 */
 	timeout = be32_to_cpu(timeout_cap->a);
+	if (timeout && timeout < 1000) {
+		/* timeouts in msec rather usec */
+		scale = 1000;
+		chip->vendor.timeout_adjusted = true;
+	}
 	if (timeout)
-		chip->vendor.timeout_a = usecs_to_jiffies(timeout);
+		chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
 	timeout = be32_to_cpu(timeout_cap->b);
 	if (timeout)
-		chip->vendor.timeout_b = usecs_to_jiffies(timeout);
+		chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
 	timeout = be32_to_cpu(timeout_cap->c);
 	if (timeout)
-		chip->vendor.timeout_c = usecs_to_jiffies(timeout);
+		chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
 	timeout = be32_to_cpu(timeout_cap->d);
 	if (timeout)
-		chip->vendor.timeout_d = usecs_to_jiffies(timeout);
+		chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
 
 duration:
 	tpm_cmd.header.in = tpm_getcap_header;
@@ -575,23 +582,31 @@
 	if (rc)
 		return;
 
-	if (be32_to_cpu(tpm_cmd.header.out.return_code)
-	    != 3 * sizeof(u32))
+	if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+	    be32_to_cpu(tpm_cmd.header.out.length)
+	    != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
 		return;
+
 	duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
 	chip->vendor.duration[TPM_SHORT] =
 	    usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
-	/* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
-	 * value wrong and apparently reports msecs rather than usecs. So we
-	 * fix up the resulting too-small TPM_SHORT value to make things work.
-	 */
-	if (chip->vendor.duration[TPM_SHORT] < (HZ/100))
-		chip->vendor.duration[TPM_SHORT] = HZ;
-
 	chip->vendor.duration[TPM_MEDIUM] =
 	    usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
 	chip->vendor.duration[TPM_LONG] =
 	    usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
+
+	/* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
+	 * value wrong and apparently reports msecs rather than usecs. So we
+	 * fix up the resulting too-small TPM_SHORT value to make things work.
+	 * We also scale the TPM_MEDIUM and -_LONG values by 1000.
+	 */
+	if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
+		chip->vendor.duration[TPM_SHORT] = HZ;
+		chip->vendor.duration[TPM_MEDIUM] *= 1000;
+		chip->vendor.duration[TPM_LONG] *= 1000;
+		chip->vendor.duration_adjusted = true;
+		dev_info(chip->dev, "Adjusting TPM timeout parameters.");
+	}
 }
 EXPORT_SYMBOL_GPL(tpm_get_timeouts);
 
@@ -600,7 +615,7 @@
 	u8 data[] = {
 		0, 193,			/* TPM_TAG_RQU_COMMAND */
 		0, 0, 0, 10,		/* length */
-		0, 0, 0, 83,		/* TPM_ORD_GetCapability */
+		0, 0, 0, 83,		/* TPM_ORD_ContinueSelfTest */
 	};
 
 	tpm_transmit(chip, data, sizeof(data));
@@ -863,18 +878,24 @@
 	data = tpm_cmd.params.readpubek_out_buffer;
 	str +=
 	    sprintf(str,
-		    "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n"
-		    "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X"
-		    " %02X %02X %02X %02X %02X %02X %02X %02X\n"
-		    "Modulus length: %d\nModulus: \n",
-		    data[10], data[11], data[12], data[13], data[14],
-		    data[15], data[16], data[17], data[22], data[23],
-		    data[24], data[25], data[26], data[27], data[28],
-		    data[29], data[30], data[31], data[32], data[33],
-		    be32_to_cpu(*((__be32 *) (data + 34))));
+		    "Algorithm: %02X %02X %02X %02X\n"
+		    "Encscheme: %02X %02X\n"
+		    "Sigscheme: %02X %02X\n"
+		    "Parameters: %02X %02X %02X %02X "
+		    "%02X %02X %02X %02X "
+		    "%02X %02X %02X %02X\n"
+		    "Modulus length: %d\n"
+		    "Modulus:\n",
+		    data[0], data[1], data[2], data[3],
+		    data[4], data[5],
+		    data[6], data[7],
+		    data[12], data[13], data[14], data[15],
+		    data[16], data[17], data[18], data[19],
+		    data[20], data[21], data[22], data[23],
+		    be32_to_cpu(*((__be32 *) (data + 24))));
 
 	for (i = 0; i < 256; i++) {
-		str += sprintf(str, "%02X ", data[i + 38]);
+		str += sprintf(str, "%02X ", data[i + 28]);
 		if ((i + 1) % 16 == 0)
 			str += sprintf(str, "\n");
 	}
@@ -937,6 +958,35 @@
 }
 EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
 
+ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d %d %d [%s]\n",
+		       jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
+		       jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
+		       jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
+		       chip->vendor.duration_adjusted
+		       ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_durations);
+
+ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d %d %d %d [%s]\n",
+		       jiffies_to_usecs(chip->vendor.timeout_a),
+		       jiffies_to_usecs(chip->vendor.timeout_b),
+		       jiffies_to_usecs(chip->vendor.timeout_c),
+		       jiffies_to_usecs(chip->vendor.timeout_d),
+		       chip->vendor.timeout_adjusted
+		       ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_timeouts);
+
 ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
 			const char *buf, size_t count)
 {
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 72ddb03..9c4163c 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -56,6 +56,10 @@
 				char *);
 extern ssize_t tpm_show_temp_deactivated(struct device *,
 					 struct device_attribute *attr, char *);
+extern ssize_t tpm_show_durations(struct device *,
+				  struct device_attribute *attr, char *);
+extern ssize_t tpm_show_timeouts(struct device *,
+				 struct device_attribute *attr, char *);
 
 struct tpm_chip;
 
@@ -67,6 +71,7 @@
 	unsigned long base;		/* TPM base address */
 
 	int irq;
+	int probed_irq;
 
 	int region_size;
 	int have_region;
@@ -81,7 +86,9 @@
 	struct list_head list;
 	int locality;
 	unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */
+	bool timeout_adjusted;
 	unsigned long duration[3]; /* jiffies */
+	bool duration_adjusted;
 
 	wait_queue_head_t read_queue;
 	wait_queue_head_t int_queue;
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index a605cb7..82facc9 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -330,12 +330,12 @@
 	pdev->dev.driver = &nsc_drv.driver;
 	pdev->dev.release = tpm_nsc_remove;
 
-	if ((rc = platform_device_register(pdev)) < 0)
-		goto err_free_dev;
+	if ((rc = platform_device_add(pdev)) < 0)
+		goto err_put_dev;
 
 	if (request_region(base, 2, "tpm_nsc0") == NULL ) {
 		rc = -EBUSY;
-		goto err_unreg_dev;
+		goto err_del_dev;
 	}
 
 	if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) {
@@ -382,10 +382,10 @@
 
 err_rel_reg:
 	release_region(base, 2);
-err_unreg_dev:
-	platform_device_unregister(pdev);
-err_free_dev:
-	kfree(pdev);
+err_del_dev:
+	platform_device_del(pdev);
+err_put_dev:
+	platform_device_put(pdev);
 err_unreg_drv:
 	platform_driver_unregister(&nsc_drv);
 	return rc;
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index dd21df5..7fc2f10 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/wait.h>
 #include <linux/acpi.h>
+#include <linux/freezer.h>
 #include "tpm.h"
 
 #define TPM_HEADER_SIZE 10
@@ -79,7 +80,7 @@
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_PNP
 static int is_itpm(struct pnp_dev *dev)
 {
 	struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -92,11 +93,6 @@
 
 	return 0;
 }
-#else
-static int is_itpm(struct pnp_dev *dev)
-{
-	return 0;
-}
 #endif
 
 static int check_locality(struct tpm_chip *chip, int l)
@@ -120,7 +116,7 @@
 
 static int request_locality(struct tpm_chip *chip, int l)
 {
-	unsigned long stop;
+	unsigned long stop, timeout;
 	long rc;
 
 	if (check_locality(chip, l) >= 0)
@@ -129,17 +125,25 @@
 	iowrite8(TPM_ACCESS_REQUEST_USE,
 		 chip->vendor.iobase + TPM_ACCESS(l));
 
+	stop = jiffies + chip->vendor.timeout_a;
+
 	if (chip->vendor.irq) {
+again:
+		timeout = stop - jiffies;
+		if ((long)timeout <= 0)
+			return -1;
 		rc = wait_event_interruptible_timeout(chip->vendor.int_queue,
 						      (check_locality
 						       (chip, l) >= 0),
-						      chip->vendor.timeout_a);
+						      timeout);
 		if (rc > 0)
 			return l;
-
+		if (rc == -ERESTARTSYS && freezing(current)) {
+			clear_thread_flag(TIF_SIGPENDING);
+			goto again;
+		}
 	} else {
 		/* wait for burstcount */
-		stop = jiffies + chip->vendor.timeout_a;
 		do {
 			if (check_locality(chip, l) >= 0)
 				return l;
@@ -196,15 +200,24 @@
 	if ((status & mask) == mask)
 		return 0;
 
+	stop = jiffies + timeout;
+
 	if (chip->vendor.irq) {
+again:
+		timeout = stop - jiffies;
+		if ((long)timeout <= 0)
+			return -ETIME;
 		rc = wait_event_interruptible_timeout(*queue,
 						      ((tpm_tis_status
 							(chip) & mask) ==
 						       mask), timeout);
 		if (rc > 0)
 			return 0;
+		if (rc == -ERESTARTSYS && freezing(current)) {
+			clear_thread_flag(TIF_SIGPENDING);
+			goto again;
+		}
 	} else {
-		stop = jiffies + timeout;
 		do {
 			msleep(TPM_TIMEOUT);
 			status = tpm_tis_status(chip);
@@ -288,11 +301,10 @@
  * tpm.c can skip polling for the data to be available as the interrupt is
  * waited for here
  */
-static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
+static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len)
 {
 	int rc, status, burstcnt;
 	size_t count = 0;
-	u32 ordinal;
 
 	if (request_locality(chip, 0) < 0)
 		return -EBUSY;
@@ -327,8 +339,7 @@
 
 	/* write last byte */
 	iowrite8(buf[count],
-		 chip->vendor.iobase +
-		 TPM_DATA_FIFO(chip->vendor.locality));
+		 chip->vendor.iobase + TPM_DATA_FIFO(chip->vendor.locality));
 	wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c,
 		      &chip->vendor.int_queue);
 	status = tpm_tis_status(chip);
@@ -337,6 +348,28 @@
 		goto out_err;
 	}
 
+	return 0;
+
+out_err:
+	tpm_tis_ready(chip);
+	release_locality(chip, chip->vendor.locality, 0);
+	return rc;
+}
+
+/*
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+	int rc;
+	u32 ordinal;
+
+	rc = tpm_tis_send_data(chip, buf, len);
+	if (rc < 0)
+		return rc;
+
 	/* go and do it */
 	iowrite8(TPM_STS_GO,
 		 chip->vendor.iobase + TPM_STS(chip->vendor.locality));
@@ -358,6 +391,47 @@
 	return rc;
 }
 
+/*
+ * Early probing for iTPM with STS_DATA_EXPECT flaw.
+ * Try sending command without itpm flag set and if that
+ * fails, repeat with itpm flag set.
+ */
+static int probe_itpm(struct tpm_chip *chip)
+{
+	int rc = 0;
+	u8 cmd_getticks[] = {
+		0x00, 0xc1, 0x00, 0x00, 0x00, 0x0a,
+		0x00, 0x00, 0x00, 0xf1
+	};
+	size_t len = sizeof(cmd_getticks);
+	int rem_itpm = itpm;
+
+	itpm = 0;
+
+	rc = tpm_tis_send_data(chip, cmd_getticks, len);
+	if (rc == 0)
+		goto out;
+
+	tpm_tis_ready(chip);
+	release_locality(chip, chip->vendor.locality, 0);
+
+	itpm = 1;
+
+	rc = tpm_tis_send_data(chip, cmd_getticks, len);
+	if (rc == 0) {
+		dev_info(chip->dev, "Detected an iTPM.\n");
+		rc = 1;
+	} else
+		rc = -EFAULT;
+
+out:
+	itpm = rem_itpm;
+	tpm_tis_ready(chip);
+	release_locality(chip, chip->vendor.locality, 0);
+
+	return rc;
+}
+
 static const struct file_operations tis_ops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
@@ -376,6 +450,8 @@
 		   NULL);
 static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
 
 static struct attribute *tis_attrs[] = {
 	&dev_attr_pubek.attr,
@@ -385,7 +461,9 @@
 	&dev_attr_owned.attr,
 	&dev_attr_temp_deactivated.attr,
 	&dev_attr_caps.attr,
-	&dev_attr_cancel.attr, NULL,
+	&dev_attr_cancel.attr,
+	&dev_attr_durations.attr,
+	&dev_attr_timeouts.attr, NULL,
 };
 
 static struct attribute_group tis_attr_grp = {
@@ -416,7 +494,7 @@
 	if (interrupt == 0)
 		return IRQ_NONE;
 
-	chip->vendor.irq = irq;
+	chip->vendor.probed_irq = irq;
 
 	/* Clear interrupts handled with TPM_EOI */
 	iowrite32(interrupt,
@@ -464,7 +542,7 @@
 			resource_size_t len, unsigned int irq)
 {
 	u32 vendor, intfcaps, intmask;
-	int rc, i;
+	int rc, i, irq_s, irq_e;
 	struct tpm_chip *chip;
 
 	if (!(chip = tpm_register_hardware(dev, &tpm_tis)))
@@ -493,6 +571,14 @@
 		 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
 		 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
 
+	if (!itpm) {
+		itpm = probe_itpm(chip);
+		if (itpm < 0) {
+			rc = -ENODEV;
+			goto out_err;
+		}
+	}
+
 	if (itpm)
 		dev_info(dev, "Intel iTPM workaround enabled\n");
 
@@ -522,6 +608,9 @@
 	if (intfcaps & TPM_INTF_DATA_AVAIL_INT)
 		dev_dbg(dev, "\tData Avail Int Support\n");
 
+	/* get the timeouts before testing for irqs */
+	tpm_get_timeouts(chip);
+
 	/* INTERRUPT Setup */
 	init_waitqueue_head(&chip->vendor.read_queue);
 	init_waitqueue_head(&chip->vendor.int_queue);
@@ -540,13 +629,19 @@
 	if (interrupts)
 		chip->vendor.irq = irq;
 	if (interrupts && !chip->vendor.irq) {
-		chip->vendor.irq =
+		irq_s =
 		    ioread8(chip->vendor.iobase +
 			    TPM_INT_VECTOR(chip->vendor.locality));
+		if (irq_s) {
+			irq_e = irq_s;
+		} else {
+			irq_s = 3;
+			irq_e = 15;
+		}
 
-		for (i = 3; i < 16 && chip->vendor.irq == 0; i++) {
+		for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) {
 			iowrite8(i, chip->vendor.iobase +
-				    TPM_INT_VECTOR(chip->vendor.locality));
+				 TPM_INT_VECTOR(chip->vendor.locality));
 			if (request_irq
 			    (i, tis_int_probe, IRQF_SHARED,
 			     chip->vendor.miscdev.name, chip) != 0) {
@@ -568,9 +663,22 @@
 				  chip->vendor.iobase +
 				  TPM_INT_ENABLE(chip->vendor.locality));
 
+			chip->vendor.probed_irq = 0;
+
 			/* Generate Interrupts */
 			tpm_gen_interrupt(chip);
 
+			chip->vendor.irq = chip->vendor.probed_irq;
+
+			/* free_irq will call into tis_int_probe;
+			   clear all irqs we haven't seen while doing
+			   tpm_gen_interrupt */
+			iowrite32(ioread32
+				  (chip->vendor.iobase +
+				   TPM_INT_STATUS(chip->vendor.locality)),
+				  chip->vendor.iobase +
+				  TPM_INT_STATUS(chip->vendor.locality));
+
 			/* Turn off */
 			iowrite32(intmask,
 				  chip->vendor.iobase +
@@ -609,7 +717,6 @@
 	list_add(&chip->vendor.list, &tis_chips);
 	spin_unlock(&tis_lock);
 
-	tpm_get_timeouts(chip);
 	tpm_continue_selftest(chip);
 
 	return 0;
@@ -619,6 +726,29 @@
 	tpm_remove_hardware(chip->dev);
 	return rc;
 }
+
+static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
+{
+	u32 intmask;
+
+	/* reenable interrupts that device may have lost or
+	   BIOS/firmware may have disabled */
+	iowrite8(chip->vendor.irq, chip->vendor.iobase +
+		 TPM_INT_VECTOR(chip->vendor.locality));
+
+	intmask =
+	    ioread32(chip->vendor.iobase +
+		     TPM_INT_ENABLE(chip->vendor.locality));
+
+	intmask |= TPM_INTF_CMD_READY_INT
+	    | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT
+	    | TPM_INTF_STS_VALID_INT | TPM_GLOBAL_INT_ENABLE;
+
+	iowrite32(intmask,
+		  chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality));
+}
+
+
 #ifdef CONFIG_PNP
 static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
 				      const struct pnp_device_id *pnp_id)
@@ -650,6 +780,9 @@
 	struct tpm_chip *chip = pnp_get_drvdata(dev);
 	int ret;
 
+	if (chip->vendor.irq)
+		tpm_tis_reenable_interrupts(chip);
+
 	ret = tpm_pm_resume(&dev->dev);
 	if (!ret)
 		tpm_continue_selftest(chip);
@@ -702,6 +835,11 @@
 
 static int tpm_tis_resume(struct platform_device *dev)
 {
+	struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
+
+	if (chip->vendor.irq)
+		tpm_tis_reenable_interrupts(chip);
+
 	return tpm_pm_resume(&dev->dev);
 }
 static struct platform_driver tis_drv = {
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 2e5b204..d0183dd 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1,6 +1,6 @@
 /* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support.
  *
- * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2010, 2011 David S. Miller <davem@davemloft.net>
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -31,8 +31,8 @@
 #include "n2_core.h"
 
 #define DRV_MODULE_NAME		"n2_crypto"
-#define DRV_MODULE_VERSION	"0.1"
-#define DRV_MODULE_RELDATE	"April 29, 2010"
+#define DRV_MODULE_VERSION	"0.2"
+#define DRV_MODULE_RELDATE	"July 28, 2011"
 
 static char version[] __devinitdata =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -1823,22 +1823,17 @@
 static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node,
 				   struct spu_mdesc_info *ip)
 {
-	const u64 *intr, *ino;
-	int intr_len, ino_len;
+	const u64 *ino;
+	int ino_len;
 	int i;
 
-	intr = mdesc_get_property(mdesc, node, "intr", &intr_len);
-	if (!intr)
-		return -ENODEV;
-
 	ino = mdesc_get_property(mdesc, node, "ino", &ino_len);
-	if (!ino)
+	if (!ino) {
+		printk("NO 'ino'\n");
 		return -ENODEV;
+	}
 
-	if (intr_len != ino_len)
-		return -EINVAL;
-
-	ip->num_intrs = intr_len / sizeof(u64);
+	ip->num_intrs = ino_len / sizeof(u64);
 	ip->ino_table = kzalloc((sizeof(struct ino_blob) *
 				 ip->num_intrs),
 				GFP_KERNEL);
@@ -1847,7 +1842,7 @@
 
 	for (i = 0; i < ip->num_intrs; i++) {
 		struct ino_blob *b = &ip->ino_table[i];
-		b->intr = intr[i];
+		b->intr = i + 1;
 		b->ino = ino[i];
 	}
 
@@ -2204,6 +2199,10 @@
 		.name = "n2cp",
 		.compatible = "SUNW,vf-cwq",
 	},
+	{
+		.name = "n2cp",
+		.compatible = "SUNW,kt-cwq",
+	},
 	{},
 };
 
@@ -2228,6 +2227,10 @@
 		.name = "ncp",
 		.compatible = "SUNW,vf-mau",
 	},
+	{
+		.name = "ncp",
+		.compatible = "SUNW,kt-mau",
+	},
 	{},
 };
 
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 95a08a8..5745b7f 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -271,7 +271,7 @@
 	unsigned long edtl;
 	int err;
 	struct iser_data_buf *data_buf;
-	struct iscsi_cmd *hdr =  (struct iscsi_cmd *)task->hdr;
+	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
 	struct scsi_cmnd *sc  =  task->sc;
 	struct iser_tx_desc *tx_desc = &iser_task->desc;
 
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 56abf3d..d728875 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -154,10 +154,13 @@
 	{ 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
 	{ 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX },
 	{ 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+	{ 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+	{ 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 },
 	{ 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+	{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
 	{ 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
@@ -236,9 +239,10 @@
 	XPAD_XBOX360_VENDOR(0x046d),		/* Logitech X-Box 360 style controllers */
 	XPAD_XBOX360_VENDOR(0x0738),		/* Mad Catz X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x0e6f),		/* 0x0e6f X-Box 360 controllers */
+	XPAD_XBOX360_VENDOR(0x12ab),		/* X-Box 360 dance pads */
 	XPAD_XBOX360_VENDOR(0x1430),		/* RedOctane X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x146b),		/* BigBen Interactive Controllers */
-	XPAD_XBOX360_VENDOR(0x1bad),		/* Rock Band Drums */
+	XPAD_XBOX360_VENDOR(0x1bad),		/* Harminix Rock Band Guitar and Drums */
 	XPAD_XBOX360_VENDOR(0x0f0d),            /* Hori Controllers */
 	{ }
 };
@@ -545,7 +549,7 @@
 	struct usb_endpoint_descriptor *ep_irq_out;
 	int error;
 
-	if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX)
+	if (xpad->xtype == XTYPE_UNKNOWN)
 		return 0;
 
 	xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -579,13 +583,13 @@
 
 static void xpad_stop_output(struct usb_xpad *xpad)
 {
-	if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX)
+	if (xpad->xtype != XTYPE_UNKNOWN)
 		usb_kill_urb(xpad->irq_out);
 }
 
 static void xpad_deinit_output(struct usb_xpad *xpad)
 {
-	if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) {
+	if (xpad->xtype != XTYPE_UNKNOWN) {
 		usb_free_urb(xpad->irq_out);
 		usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
 				xpad->odata, xpad->odata_dma);
@@ -632,6 +636,23 @@
 
 			return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
 
+		case XTYPE_XBOX360W:
+			xpad->odata[0] = 0x00;
+			xpad->odata[1] = 0x01;
+			xpad->odata[2] = 0x0F;
+			xpad->odata[3] = 0xC0;
+			xpad->odata[4] = 0x00;
+			xpad->odata[5] = strong / 256;
+			xpad->odata[6] = weak / 256;
+			xpad->odata[7] = 0x00;
+			xpad->odata[8] = 0x00;
+			xpad->odata[9] = 0x00;
+			xpad->odata[10] = 0x00;
+			xpad->odata[11] = 0x00;
+			xpad->irq_out->transfer_buffer_length = 12;
+
+			return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+
 		default:
 			dbg("%s - rumble command sent to unsupported xpad type: %d",
 				__func__, xpad->xtype);
@@ -644,7 +665,7 @@
 
 static int xpad_init_ff(struct usb_xpad *xpad)
 {
-	if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX)
+	if (xpad->xtype == XTYPE_UNKNOWN)
 		return 0;
 
 	input_set_capability(xpad->dev, EV_FF, FF_RUMBLE);
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index af45d27..7b404e5 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 6315986..c770826 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 11478eb..19cfc0c 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -1578,14 +1578,14 @@
 	atkbd_platform_fixup = atkbd_apply_forced_release_keylist;
 	atkbd_platform_fixup_data = id->driver_data;
 
-	return 0;
+	return 1;
 }
 
 static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id)
 {
 	atkbd_platform_scancode_fixup = id->driver_data;
 
-	return 0;
+	return 1;
 }
 
 static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = {
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 6e6145b..ce281d1 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -2,6 +2,7 @@
  * Driver for keys on GPIO lines capable of generating interrupts.
  *
  * Copyright 2005 Phil Blundell
+ * Copyright 2010, 2011 David Jander <david@protonic.nl>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -25,6 +26,8 @@
 #include <linux/gpio_keys.h>
 #include <linux/workqueue.h>
 #include <linux/gpio.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
 
 struct gpio_button_data {
 	struct gpio_keys_button *button;
@@ -415,7 +418,7 @@
 	if (!button->can_disable)
 		irqflags |= IRQF_SHARED;
 
-	error = request_any_context_irq(irq, gpio_keys_isr, irqflags, desc, bdata);
+	error = request_threaded_irq(irq, NULL, gpio_keys_isr, irqflags, desc, bdata);
 	if (error < 0) {
 		dev_err(dev, "Unable to claim irq %d; error %d\n",
 			irq, error);
@@ -445,15 +448,120 @@
 		ddata->disable(input->dev.parent);
 }
 
+/*
+ * Handlers for alternative sources of platform_data
+ */
+#ifdef CONFIG_OF
+/*
+ * Translate OpenFirmware node properties into platform_data
+ */
+static int gpio_keys_get_devtree_pdata(struct device *dev,
+			    struct gpio_keys_platform_data *pdata)
+{
+	struct device_node *node, *pp;
+	int i;
+	struct gpio_keys_button *buttons;
+	const u32 *reg;
+	int len;
+
+	node = dev->of_node;
+	if (node == NULL)
+		return -ENODEV;
+
+	memset(pdata, 0, sizeof *pdata);
+
+	pdata->rep = !!of_get_property(node, "autorepeat", &len);
+
+	/* First count the subnodes */
+	pdata->nbuttons = 0;
+	pp = NULL;
+	while ((pp = of_get_next_child(node, pp)))
+		pdata->nbuttons++;
+
+	if (pdata->nbuttons == 0)
+		return -ENODEV;
+
+	buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
+	if (!buttons)
+		return -ENODEV;
+
+	pp = NULL;
+	i = 0;
+	while ((pp = of_get_next_child(node, pp))) {
+		enum of_gpio_flags flags;
+
+		if (!of_find_property(pp, "gpios", NULL)) {
+			pdata->nbuttons--;
+			dev_warn(dev, "Found button without gpios\n");
+			continue;
+		}
+		buttons[i].gpio = of_get_gpio_flags(pp, 0, &flags);
+		buttons[i].active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+		reg = of_get_property(pp, "linux,code", &len);
+		if (!reg) {
+			dev_err(dev, "Button without keycode: 0x%x\n", buttons[i].gpio);
+			goto out_fail;
+		}
+		buttons[i].code = be32_to_cpup(reg);
+
+		buttons[i].desc = of_get_property(pp, "label", &len);
+
+		reg = of_get_property(pp, "linux,input-type", &len);
+		buttons[i].type = reg ? be32_to_cpup(reg) : EV_KEY;
+
+		buttons[i].wakeup = !!of_get_property(pp, "gpio-key,wakeup", NULL);
+
+		reg = of_get_property(pp, "debounce-interval", &len);
+		buttons[i].debounce_interval = reg ? be32_to_cpup(reg) : 5;
+
+		i++;
+	}
+
+	pdata->buttons = buttons;
+
+	return 0;
+
+out_fail:
+	kfree(buttons);
+	return -ENODEV;
+}
+
+static struct of_device_id gpio_keys_of_match[] = {
+	{ .compatible = "gpio-keys", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpio_keys_of_match);
+
+#else
+
+static int gpio_keys_get_devtree_pdata(struct device *dev,
+			    struct gpio_keys_platform_data *altp)
+{
+	return -ENODEV;
+}
+
+#define gpio_keys_of_match NULL
+
+#endif
+
 static int __devinit gpio_keys_probe(struct platform_device *pdev)
 {
 	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
 	struct gpio_keys_drvdata *ddata;
 	struct device *dev = &pdev->dev;
+	struct gpio_keys_platform_data alt_pdata;
 	struct input_dev *input;
 	int i, error;
 	int wakeup = 0;
 
+	if (!pdata) {
+		error = gpio_keys_get_devtree_pdata(dev, &alt_pdata);
+		if (error)
+			return error;
+		pdata = &alt_pdata;
+	}
+
 	ddata = kzalloc(sizeof(struct gpio_keys_drvdata) +
 			pdata->nbuttons * sizeof(struct gpio_button_data),
 			GFP_KERNEL);
@@ -544,13 +652,15 @@
  fail1:
 	input_free_device(input);
 	kfree(ddata);
+	/* If we have no platform_data, we allocated buttons dynamically. */
+	if (!pdev->dev.platform_data)
+		kfree(pdata->buttons);
 
 	return error;
 }
 
 static int __devexit gpio_keys_remove(struct platform_device *pdev)
 {
-	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
 	struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
 	struct input_dev *input = ddata->input;
 	int i;
@@ -559,31 +669,39 @@
 
 	device_init_wakeup(&pdev->dev, 0);
 
-	for (i = 0; i < pdata->nbuttons; i++) {
-		int irq = gpio_to_irq(pdata->buttons[i].gpio);
+	for (i = 0; i < ddata->n_buttons; i++) {
+		int irq = gpio_to_irq(ddata->data[i].button->gpio);
 		free_irq(irq, &ddata->data[i]);
 		if (ddata->data[i].timer_debounce)
 			del_timer_sync(&ddata->data[i].timer);
 		cancel_work_sync(&ddata->data[i].work);
-		gpio_free(pdata->buttons[i].gpio);
+		gpio_free(ddata->data[i].button->gpio);
 	}
 
 	input_unregister_device(input);
 
+	/*
+	 * If we had no platform_data, we allocated buttons dynamically, and
+	 * must free them here. ddata->data[0].button is the pointer to the
+	 * beginning of the allocated array.
+	 */
+	if (!pdev->dev.platform_data)
+		kfree(ddata->data[0].button);
+
+	kfree(ddata);
+
 	return 0;
 }
 
-
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int gpio_keys_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+	struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
 	int i;
 
-	if (device_may_wakeup(&pdev->dev)) {
-		for (i = 0; i < pdata->nbuttons; i++) {
-			struct gpio_keys_button *button = &pdata->buttons[i];
+	if (device_may_wakeup(dev)) {
+		for (i = 0; i < ddata->n_buttons; i++) {
+			struct gpio_keys_button *button = ddata->data[i].button;
 			if (button->wakeup) {
 				int irq = gpio_to_irq(button->gpio);
 				enable_irq_wake(irq);
@@ -596,15 +714,13 @@
 
 static int gpio_keys_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
-	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+	struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
 	int i;
 
-	for (i = 0; i < pdata->nbuttons; i++) {
+	for (i = 0; i < ddata->n_buttons; i++) {
 
-		struct gpio_keys_button *button = &pdata->buttons[i];
-		if (button->wakeup && device_may_wakeup(&pdev->dev)) {
+		struct gpio_keys_button *button = ddata->data[i].button;
+		if (button->wakeup && device_may_wakeup(dev)) {
 			int irq = gpio_to_irq(button->gpio);
 			disable_irq_wake(irq);
 		}
@@ -615,22 +731,18 @@
 
 	return 0;
 }
-
-static const struct dev_pm_ops gpio_keys_pm_ops = {
-	.suspend	= gpio_keys_suspend,
-	.resume		= gpio_keys_resume,
-};
 #endif
 
+static SIMPLE_DEV_PM_OPS(gpio_keys_pm_ops, gpio_keys_suspend, gpio_keys_resume);
+
 static struct platform_driver gpio_keys_device_driver = {
 	.probe		= gpio_keys_probe,
 	.remove		= __devexit_p(gpio_keys_remove),
 	.driver		= {
 		.name	= "gpio-keys",
 		.owner	= THIS_MODULE,
-#ifdef CONFIG_PM
 		.pm	= &gpio_keys_pm_ops,
-#endif
+		.of_match_table = gpio_keys_of_match,
 	}
 };
 
@@ -644,10 +756,10 @@
 	platform_driver_unregister(&gpio_keys_device_driver);
 }
 
-module_init(gpio_keys_init);
+late_initcall(gpio_keys_init);
 module_exit(gpio_keys_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Phil Blundell <pb@handhelds.org>");
-MODULE_DESCRIPTION("Keyboard driver for CPU GPIOs");
+MODULE_DESCRIPTION("Keyboard driver for GPIOs");
 MODULE_ALIAS("platform:gpio-keys");
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index 71f744a8..ab0acaf 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -146,7 +146,6 @@
 	/* device lock */
 	struct mutex		lock;
 	struct i2c_client	*client;
-	struct work_struct	work;
 	struct input_dev	*idev;
 	bool			kp_enabled;
 	bool			pm_suspend;
@@ -162,7 +161,6 @@
 
 #define client_to_lm8323(c)	container_of(c, struct lm8323_chip, client)
 #define dev_to_lm8323(d)	container_of(d, struct lm8323_chip, client->dev)
-#define work_to_lm8323(w)	container_of(w, struct lm8323_chip, work)
 #define cdev_to_pwm(c)		container_of(c, struct lm8323_pwm, cdev)
 #define work_to_pwm(w)		container_of(w, struct lm8323_pwm, work)
 
@@ -375,9 +373,9 @@
  * Bottom half: handle the interrupt by posting key events, or dealing with
  * errors appropriately.
  */
-static void lm8323_work(struct work_struct *work)
+static irqreturn_t lm8323_irq(int irq, void *_lm)
 {
-	struct lm8323_chip *lm = work_to_lm8323(work);
+	struct lm8323_chip *lm = _lm;
 	u8 ints;
 	int i;
 
@@ -409,16 +407,6 @@
 	}
 
 	mutex_unlock(&lm->lock);
-}
-
-/*
- * We cannot use I2C in interrupt context, so we just schedule work.
- */
-static irqreturn_t lm8323_irq(int irq, void *data)
-{
-	struct lm8323_chip *lm = data;
-
-	schedule_work(&lm->work);
 
 	return IRQ_HANDLED;
 }
@@ -675,7 +663,6 @@
 	lm->client = client;
 	lm->idev = idev;
 	mutex_init(&lm->lock);
-	INIT_WORK(&lm->work, lm8323_work);
 
 	lm->size_x = pdata->size_x;
 	lm->size_y = pdata->size_y;
@@ -746,9 +733,8 @@
 		goto fail3;
 	}
 
-	err = request_irq(client->irq, lm8323_irq,
-			  IRQF_TRIGGER_FALLING | IRQF_DISABLED,
-			  "lm8323", lm);
+	err = request_threaded_irq(client->irq, NULL, lm8323_irq,
+			  IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm);
 	if (err) {
 		dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
 		goto fail4;
@@ -783,7 +769,6 @@
 
 	disable_irq_wake(client->irq);
 	free_irq(client->irq, lm);
-	cancel_work_sync(&lm->work);
 
 	input_unregister_device(lm->idev);
 
diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c
index 0a9e811..1c1615d 100644
--- a/drivers/input/keyboard/mpr121_touchkey.c
+++ b/drivers/input/keyboard/mpr121_touchkey.c
@@ -43,14 +43,15 @@
  * enabled capacitance sensing inputs and its run/suspend mode.
  */
 #define ELECTRODE_CONF_ADDR		0x5e
+#define ELECTRODE_CONF_QUICK_CHARGE	0x80
 #define AUTO_CONFIG_CTRL_ADDR		0x7b
 #define AUTO_CONFIG_USL_ADDR		0x7d
 #define AUTO_CONFIG_LSL_ADDR		0x7e
 #define AUTO_CONFIG_TL_ADDR		0x7f
 
 /* Threshold of touch/release trigger */
-#define TOUCH_THRESHOLD			0x0f
-#define RELEASE_THRESHOLD		0x0a
+#define TOUCH_THRESHOLD			0x08
+#define RELEASE_THRESHOLD		0x05
 /* Masks for touch and release triggers */
 #define TOUCH_STATUS_MASK		0xfff
 /* MPR121 has 12 keys */
@@ -127,7 +128,7 @@
 				      struct i2c_client *client)
 {
 	const struct mpr121_init_register *reg;
-	unsigned char usl, lsl, tl;
+	unsigned char usl, lsl, tl, eleconf;
 	int i, t, vdd, ret;
 
 	/* Set up touch/release threshold for ele0-ele11 */
@@ -163,8 +164,15 @@
 	ret = i2c_smbus_write_byte_data(client, AUTO_CONFIG_USL_ADDR, usl);
 	ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_LSL_ADDR, lsl);
 	ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_TL_ADDR, tl);
+
+	/*
+	 * Quick charge bit will let the capacitive charge to ready
+	 * state quickly, or the buttons may not function after system
+	 * boot.
+	 */
+	eleconf = mpr121->keycount | ELECTRODE_CONF_QUICK_CHARGE;
 	ret |= i2c_smbus_write_byte_data(client, ELECTRODE_CONF_ADDR,
-					 mpr121->keycount);
+					 eleconf);
 	if (ret != 0)
 		goto err_i2c_write;
 
diff --git a/drivers/input/keyboard/pmic8xxx-keypad.c b/drivers/input/keyboard/pmic8xxx-keypad.c
index 6229c3e..e7cc51d 100644
--- a/drivers/input/keyboard/pmic8xxx-keypad.c
+++ b/drivers/input/keyboard/pmic8xxx-keypad.c
@@ -700,9 +700,9 @@
 	return 0;
 
 err_pmic_reg_read:
-	free_irq(kp->key_stuck_irq, NULL);
+	free_irq(kp->key_stuck_irq, kp);
 err_req_stuck_irq:
-	free_irq(kp->key_sense_irq, NULL);
+	free_irq(kp->key_sense_irq, kp);
 err_gpio_config:
 err_get_irq:
 	input_free_device(kp->input);
@@ -717,8 +717,8 @@
 	struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
 
 	device_init_wakeup(&pdev->dev, 0);
-	free_irq(kp->key_stuck_irq, NULL);
-	free_irq(kp->key_sense_irq, NULL);
+	free_irq(kp->key_stuck_irq, kp);
+	free_irq(kp->key_sense_irq, kp);
 	input_unregister_device(kp->input);
 	kfree(kp);
 
diff --git a/drivers/input/keyboard/qt1070.c b/drivers/input/keyboard/qt1070.c
index ca7b891..b21bf5b 100644
--- a/drivers/input/keyboard/qt1070.c
+++ b/drivers/input/keyboard/qt1070.c
@@ -239,8 +239,6 @@
 	input_unregister_device(data->input);
 	kfree(data);
 
-	i2c_set_clientdata(client, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 6876700..934aeb583 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -291,7 +291,7 @@
 	return 0;
 }
 
-#if CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_SLEEP
 static int sh_keysc_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index 2b3b73e..da3828f 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -657,7 +657,7 @@
 
 	input_set_drvdata(input_dev, kbc);
 
-	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
 
 	input_dev->keycode = kbc->keycode;
diff --git a/drivers/input/keyboard/tnetv107x-keypad.c b/drivers/input/keyboard/tnetv107x-keypad.c
index c8f097a..1c58681 100644
--- a/drivers/input/keyboard/tnetv107x-keypad.c
+++ b/drivers/input/keyboard/tnetv107x-keypad.c
@@ -337,5 +337,5 @@
 
 MODULE_AUTHOR("Cyril Chemparathy");
 MODULE_DESCRIPTION("TNETV107X Keypad Driver");
-MODULE_ALIAS("platform: tnetv107x-keypad");
+MODULE_ALIAS("platform:tnetv107x-keypad");
 MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index d1bf872..c9104bb 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -100,6 +100,27 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called max8925_onkey.
 
+config INPUT_MMA8450
+	tristate "MMA8450 - Freescale's 3-Axis, 8/12-bit Digital Accelerometer"
+	depends on I2C
+	select INPUT_POLLDEV
+	help
+	  Say Y here if you want to support Freescale's MMA8450 Accelerometer
+	  through I2C interface.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mma8450.
+
+config INPUT_MPU3050
+	tristate "MPU3050 Triaxial gyroscope sensor"
+	depends on I2C
+	help
+	  Say Y here if you want to support InvenSense MPU3050
+	  connected via an I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mpu3050.
+
 config INPUT_APANEL
 	tristate "Fujitsu Lifebook Application Panel buttons"
 	depends on X86 && I2C && LEDS_CLASS
@@ -209,6 +230,23 @@
 	  To compile this driver as a module, choose M here: the module will
 	  be called keyspan_remote.
 
+config INPUT_KXTJ9
+	tristate "Kionix KXTJ9 tri-axis digital accelerometer"
+	depends on I2C
+	help
+	  Say Y here to enable support for the Kionix KXTJ9 digital tri-axis
+	  accelerometer.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called kxtj9.
+
+config INPUT_KXTJ9_POLLED_MODE
+	bool "Enable polling mode support"
+	depends on INPUT_KXTJ9
+	select INPUT_POLLDEV
+	help
+	  Say Y here if you need accelerometer to work in polling mode.
+
 config INPUT_POWERMATE
 	tristate "Griffin PowerMate and Contour Jog support"
 	depends on USB_ARCH_HAS_HCD
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 4da7c3a..299ad5e 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -25,8 +25,11 @@
 obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
+obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)		+= m68kspkr.o
 obj-$(CONFIG_INPUT_MAX8925_ONKEY)	+= max8925_onkey.o
+obj-$(CONFIG_INPUT_MMA8450)		+= mma8450.o
+obj-$(CONFIG_INPUT_MPU3050)		+= mpu3050.o
 obj-$(CONFIG_INPUT_PCAP)		+= pcap_keys.o
 obj-$(CONFIG_INPUT_PCF50633_PMU)	+= pcf50633-input.o
 obj-$(CONFIG_INPUT_PCF8574)		+= pcf8574_keypad.o
@@ -46,4 +49,3 @@
 obj-$(CONFIG_INPUT_WM831X_ON)		+= wm831x-on.o
 obj-$(CONFIG_INPUT_XEN_KBDDEV_FRONTEND)	+= xen-kbdfront.o
 obj-$(CONFIG_INPUT_YEALINK)		+= yealink.o
-
diff --git a/drivers/input/misc/bfin_rotary.c b/drivers/input/misc/bfin_rotary.c
index 4f72bdd..d00edc9 100644
--- a/drivers/input/misc/bfin_rotary.c
+++ b/drivers/input/misc/bfin_rotary.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
new file mode 100644
index 0000000..c456f63
--- /dev/null
+++ b/drivers/input/misc/kxtj9.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input/kxtj9.h>
+#include <linux/input-polldev.h>
+
+#define NAME			"kxtj9"
+#define G_MAX			8000
+/* OUTPUT REGISTERS */
+#define XOUT_L			0x06
+#define WHO_AM_I		0x0F
+/* CONTROL REGISTERS */
+#define INT_REL			0x1A
+#define CTRL_REG1		0x1B
+#define INT_CTRL1		0x1E
+#define DATA_CTRL		0x21
+/* CONTROL REGISTER 1 BITS */
+#define PC1_OFF			0x7F
+#define PC1_ON			(1 << 7)
+/* Data ready funtion enable bit: set during probe if using irq mode */
+#define DRDYE			(1 << 5)
+/* INTERRUPT CONTROL REGISTER 1 BITS */
+/* Set these during probe if using irq mode */
+#define KXTJ9_IEL		(1 << 3)
+#define KXTJ9_IEA		(1 << 4)
+#define KXTJ9_IEN		(1 << 5)
+/* INPUT_ABS CONSTANTS */
+#define FUZZ			3
+#define FLAT			3
+/* RESUME STATE INDICES */
+#define RES_DATA_CTRL		0
+#define RES_CTRL_REG1		1
+#define RES_INT_CTRL1		2
+#define RESUME_ENTRIES		3
+
+/*
+ * The following table lists the maximum appropriate poll interval for each
+ * available output data rate.
+ */
+static const struct {
+	unsigned int cutoff;
+	u8 mask;
+} kxtj9_odr_table[] = {
+	{ 3,	ODR800F },
+	{ 5,	ODR400F },
+	{ 10,	ODR200F },
+	{ 20,	ODR100F },
+	{ 40,	ODR50F  },
+	{ 80,	ODR25F  },
+	{ 0,	ODR12_5F},
+};
+
+struct kxtj9_data {
+	struct i2c_client *client;
+	struct kxtj9_platform_data pdata;
+	struct input_dev *input_dev;
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+	struct input_polled_dev *poll_dev;
+#endif
+	unsigned int last_poll_interval;
+	u8 shift;
+	u8 ctrl_reg1;
+	u8 data_ctrl;
+	u8 int_ctrl;
+};
+
+static int kxtj9_i2c_read(struct kxtj9_data *tj9, u8 addr, u8 *data, int len)
+{
+	struct i2c_msg msgs[] = {
+		{
+			.addr = tj9->client->addr,
+			.flags = tj9->client->flags,
+			.len = 1,
+			.buf = &addr,
+		},
+		{
+			.addr = tj9->client->addr,
+			.flags = tj9->client->flags | I2C_M_RD,
+			.len = len,
+			.buf = data,
+		},
+	};
+
+	return i2c_transfer(tj9->client->adapter, msgs, 2);
+}
+
+static void kxtj9_report_acceleration_data(struct kxtj9_data *tj9)
+{
+	s16 acc_data[3]; /* Data bytes from hardware xL, xH, yL, yH, zL, zH */
+	s16 x, y, z;
+	int err;
+
+	err = kxtj9_i2c_read(tj9, XOUT_L, (u8 *)acc_data, 6);
+	if (err < 0)
+		dev_err(&tj9->client->dev, "accelerometer data read failed\n");
+
+	x = le16_to_cpu(acc_data[tj9->pdata.axis_map_x]) >> tj9->shift;
+	y = le16_to_cpu(acc_data[tj9->pdata.axis_map_y]) >> tj9->shift;
+	z = le16_to_cpu(acc_data[tj9->pdata.axis_map_z]) >> tj9->shift;
+
+	input_report_abs(tj9->input_dev, ABS_X, tj9->pdata.negate_x ? -x : x);
+	input_report_abs(tj9->input_dev, ABS_Y, tj9->pdata.negate_y ? -y : y);
+	input_report_abs(tj9->input_dev, ABS_Z, tj9->pdata.negate_z ? -z : z);
+	input_sync(tj9->input_dev);
+}
+
+static irqreturn_t kxtj9_isr(int irq, void *dev)
+{
+	struct kxtj9_data *tj9 = dev;
+	int err;
+
+	/* data ready is the only possible interrupt type */
+	kxtj9_report_acceleration_data(tj9);
+
+	err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+	if (err < 0)
+		dev_err(&tj9->client->dev,
+			"error clearing interrupt status: %d\n", err);
+
+	return IRQ_HANDLED;
+}
+
+static int kxtj9_update_g_range(struct kxtj9_data *tj9, u8 new_g_range)
+{
+	switch (new_g_range) {
+	case KXTJ9_G_2G:
+		tj9->shift = 4;
+		break;
+	case KXTJ9_G_4G:
+		tj9->shift = 3;
+		break;
+	case KXTJ9_G_8G:
+		tj9->shift = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	tj9->ctrl_reg1 &= 0xe7;
+	tj9->ctrl_reg1 |= new_g_range;
+
+	return 0;
+}
+
+static int kxtj9_update_odr(struct kxtj9_data *tj9, unsigned int poll_interval)
+{
+	int err;
+	int i;
+
+	/* Use the lowest ODR that can support the requested poll interval */
+	for (i = 0; i < ARRAY_SIZE(kxtj9_odr_table); i++) {
+		tj9->data_ctrl = kxtj9_odr_table[i].mask;
+		if (poll_interval < kxtj9_odr_table[i].cutoff)
+			break;
+	}
+
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+	if (err < 0)
+		return err;
+
+	err = i2c_smbus_write_byte_data(tj9->client, DATA_CTRL, tj9->data_ctrl);
+	if (err < 0)
+		return err;
+
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int kxtj9_device_power_on(struct kxtj9_data *tj9)
+{
+	if (tj9->pdata.power_on)
+		return tj9->pdata.power_on();
+
+	return 0;
+}
+
+static void kxtj9_device_power_off(struct kxtj9_data *tj9)
+{
+	int err;
+
+	tj9->ctrl_reg1 &= PC1_OFF;
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+	if (err < 0)
+		dev_err(&tj9->client->dev, "soft power off failed\n");
+
+	if (tj9->pdata.power_off)
+		tj9->pdata.power_off();
+}
+
+static int kxtj9_enable(struct kxtj9_data *tj9)
+{
+	int err;
+
+	err = kxtj9_device_power_on(tj9);
+	if (err < 0)
+		return err;
+
+	/* ensure that PC1 is cleared before updating control registers */
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+	if (err < 0)
+		return err;
+
+	/* only write INT_CTRL_REG1 if in irq mode */
+	if (tj9->client->irq) {
+		err = i2c_smbus_write_byte_data(tj9->client,
+						INT_CTRL1, tj9->int_ctrl);
+		if (err < 0)
+			return err;
+	}
+
+	err = kxtj9_update_g_range(tj9, tj9->pdata.g_range);
+	if (err < 0)
+		return err;
+
+	/* turn on outputs */
+	tj9->ctrl_reg1 |= PC1_ON;
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+	if (err < 0)
+		return err;
+
+	err = kxtj9_update_odr(tj9, tj9->last_poll_interval);
+	if (err < 0)
+		return err;
+
+	/* clear initial interrupt if in irq mode */
+	if (tj9->client->irq) {
+		err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+		if (err < 0) {
+			dev_err(&tj9->client->dev,
+				"error clearing interrupt: %d\n", err);
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	kxtj9_device_power_off(tj9);
+	return err;
+}
+
+static void kxtj9_disable(struct kxtj9_data *tj9)
+{
+	kxtj9_device_power_off(tj9);
+}
+
+static int kxtj9_input_open(struct input_dev *input)
+{
+	struct kxtj9_data *tj9 = input_get_drvdata(input);
+
+	return kxtj9_enable(tj9);
+}
+
+static void kxtj9_input_close(struct input_dev *dev)
+{
+	struct kxtj9_data *tj9 = input_get_drvdata(dev);
+
+	kxtj9_disable(tj9);
+}
+
+static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9,
+					      struct input_dev *input_dev)
+{
+	__set_bit(EV_ABS, input_dev->evbit);
+	input_set_abs_params(input_dev, ABS_X, -G_MAX, G_MAX, FUZZ, FLAT);
+	input_set_abs_params(input_dev, ABS_Y, -G_MAX, G_MAX, FUZZ, FLAT);
+	input_set_abs_params(input_dev, ABS_Z, -G_MAX, G_MAX, FUZZ, FLAT);
+
+	input_dev->name = "kxtj9_accel";
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &tj9->client->dev;
+}
+
+static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9)
+{
+	struct input_dev *input_dev;
+	int err;
+
+	input_dev = input_allocate_device();
+	if (!input_dev) {
+		dev_err(&tj9->client->dev, "input device allocate failed\n");
+		return -ENOMEM;
+	}
+
+	tj9->input_dev = input_dev;
+
+	input_dev->open = kxtj9_input_open;
+	input_dev->close = kxtj9_input_close;
+	input_set_drvdata(input_dev, tj9);
+
+	kxtj9_init_input_device(tj9, input_dev);
+
+	err = input_register_device(tj9->input_dev);
+	if (err) {
+		dev_err(&tj9->client->dev,
+			"unable to register input polled device %s: %d\n",
+			tj9->input_dev->name, err);
+		input_free_device(tj9->input_dev);
+		return err;
+	}
+
+	return 0;
+}
+
+/*
+ * When IRQ mode is selected, we need to provide an interface to allow the user
+ * to change the output data rate of the part.  For consistency, we are using
+ * the set_poll method, which accepts a poll interval in milliseconds, and then
+ * calls update_odr() while passing this value as an argument.  In IRQ mode, the
+ * data outputs will not be read AT the requested poll interval, rather, the
+ * lowest ODR that can support the requested interval.  The client application
+ * will be responsible for retrieving data from the input node at the desired
+ * interval.
+ */
+
+/* Returns currently selected poll interval (in ms) */
+static ssize_t kxtj9_get_poll(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d\n", tj9->last_poll_interval);
+}
+
+/* Allow users to select a new poll interval (in ms) */
+static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr,
+						const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+	struct input_dev *input_dev = tj9->input_dev;
+	unsigned int interval;
+	int error;
+
+	error = kstrtouint(buf, 10, &interval);
+	if (error < 0)
+		return error;
+
+	/* Lock the device to prevent races with open/close (and itself) */
+	mutex_lock(&input_dev->mutex);
+
+	disable_irq(client->irq);
+
+	/*
+	 * Set current interval to the greater of the minimum interval or
+	 * the requested interval
+	 */
+	tj9->last_poll_interval = max(interval, tj9->pdata.min_interval);
+
+	kxtj9_update_odr(tj9, tj9->last_poll_interval);
+
+	enable_irq(client->irq);
+	mutex_unlock(&input_dev->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll);
+
+static struct attribute *kxtj9_attributes[] = {
+	&dev_attr_poll.attr,
+	NULL
+};
+
+static struct attribute_group kxtj9_attribute_group = {
+	.attrs = kxtj9_attributes
+};
+
+
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+static void kxtj9_poll(struct input_polled_dev *dev)
+{
+	struct kxtj9_data *tj9 = dev->private;
+	unsigned int poll_interval = dev->poll_interval;
+
+	kxtj9_report_acceleration_data(tj9);
+
+	if (poll_interval != tj9->last_poll_interval) {
+		kxtj9_update_odr(tj9, poll_interval);
+		tj9->last_poll_interval = poll_interval;
+	}
+}
+
+static void kxtj9_polled_input_open(struct input_polled_dev *dev)
+{
+	struct kxtj9_data *tj9 = dev->private;
+
+	kxtj9_enable(tj9);
+}
+
+static void kxtj9_polled_input_close(struct input_polled_dev *dev)
+{
+	struct kxtj9_data *tj9 = dev->private;
+
+	kxtj9_disable(tj9);
+}
+
+static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+	int err;
+	struct input_polled_dev *poll_dev;
+	poll_dev = input_allocate_polled_device();
+
+	if (!poll_dev) {
+		dev_err(&tj9->client->dev,
+			"Failed to allocate polled device\n");
+		return -ENOMEM;
+	}
+
+	tj9->poll_dev = poll_dev;
+	tj9->input_dev = poll_dev->input;
+
+	poll_dev->private = tj9;
+	poll_dev->poll = kxtj9_poll;
+	poll_dev->open = kxtj9_polled_input_open;
+	poll_dev->close = kxtj9_polled_input_close;
+
+	kxtj9_init_input_device(tj9, poll_dev->input);
+
+	err = input_register_polled_device(poll_dev);
+	if (err) {
+		dev_err(&tj9->client->dev,
+			"Unable to register polled device, err=%d\n", err);
+		input_free_polled_device(poll_dev);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+	input_unregister_polled_device(tj9->poll_dev);
+	input_free_polled_device(tj9->poll_dev);
+}
+
+#else
+
+static inline int kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+	return -ENOSYS;
+}
+
+static inline void kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+}
+
+#endif
+
+static int __devinit kxtj9_verify(struct kxtj9_data *tj9)
+{
+	int retval;
+
+	retval = kxtj9_device_power_on(tj9);
+	if (retval < 0)
+		return retval;
+
+	retval = i2c_smbus_read_byte_data(tj9->client, WHO_AM_I);
+	if (retval < 0) {
+		dev_err(&tj9->client->dev, "read err int source\n");
+		goto out;
+	}
+
+	retval = retval != 0x06 ? -EIO : 0;
+
+out:
+	kxtj9_device_power_off(tj9);
+	return retval;
+}
+
+static int __devinit kxtj9_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	const struct kxtj9_platform_data *pdata = client->dev.platform_data;
+	struct kxtj9_data *tj9;
+	int err;
+
+	if (!i2c_check_functionality(client->adapter,
+				I2C_FUNC_I2C | I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "client is not i2c capable\n");
+		return -ENXIO;
+	}
+
+	if (!pdata) {
+		dev_err(&client->dev, "platform data is NULL; exiting\n");
+		return -EINVAL;
+	}
+
+	tj9 = kzalloc(sizeof(*tj9), GFP_KERNEL);
+	if (!tj9) {
+		dev_err(&client->dev,
+			"failed to allocate memory for module data\n");
+		return -ENOMEM;
+	}
+
+	tj9->client = client;
+	tj9->pdata = *pdata;
+
+	if (pdata->init) {
+		err = pdata->init();
+		if (err < 0)
+			goto err_free_mem;
+	}
+
+	err = kxtj9_verify(tj9);
+	if (err < 0) {
+		dev_err(&client->dev, "device not recognized\n");
+		goto err_pdata_exit;
+	}
+
+	i2c_set_clientdata(client, tj9);
+
+	tj9->ctrl_reg1 = tj9->pdata.res_12bit | tj9->pdata.g_range;
+	tj9->data_ctrl = tj9->pdata.data_odr_init;
+
+	if (client->irq) {
+		/* If in irq mode, populate INT_CTRL_REG1 and enable DRDY. */
+		tj9->int_ctrl |= KXTJ9_IEN | KXTJ9_IEA | KXTJ9_IEL;
+		tj9->ctrl_reg1 |= DRDYE;
+
+		err = kxtj9_setup_input_device(tj9);
+		if (err)
+			goto err_pdata_exit;
+
+		err = request_threaded_irq(client->irq, NULL, kxtj9_isr,
+					   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					   "kxtj9-irq", tj9);
+		if (err) {
+			dev_err(&client->dev, "request irq failed: %d\n", err);
+			goto err_destroy_input;
+		}
+
+		err = sysfs_create_group(&client->dev.kobj, &kxtj9_attribute_group);
+		if (err) {
+			dev_err(&client->dev, "sysfs create failed: %d\n", err);
+			goto err_free_irq;
+		}
+
+	} else {
+		err = kxtj9_setup_polled_device(tj9);
+		if (err)
+			goto err_pdata_exit;
+	}
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, tj9);
+err_destroy_input:
+	input_unregister_device(tj9->input_dev);
+err_pdata_exit:
+	if (tj9->pdata.exit)
+		tj9->pdata.exit();
+err_free_mem:
+	kfree(tj9);
+	return err;
+}
+
+static int __devexit kxtj9_remove(struct i2c_client *client)
+{
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+	if (client->irq) {
+		sysfs_remove_group(&client->dev.kobj, &kxtj9_attribute_group);
+		free_irq(client->irq, tj9);
+		input_unregister_device(tj9->input_dev);
+	} else {
+		kxtj9_teardown_polled_device(tj9);
+	}
+
+	if (tj9->pdata.exit)
+		tj9->pdata.exit();
+
+	kfree(tj9);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int kxtj9_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+	struct input_dev *input_dev = tj9->input_dev;
+
+	mutex_lock(&input_dev->mutex);
+
+	if (input_dev->users)
+		kxtj9_disable(tj9);
+
+	mutex_unlock(&input_dev->mutex);
+	return 0;
+}
+
+static int kxtj9_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+	struct input_dev *input_dev = tj9->input_dev;
+	int retval = 0;
+
+	mutex_lock(&input_dev->mutex);
+
+	if (input_dev->users)
+		kxtj9_enable(tj9);
+
+	mutex_unlock(&input_dev->mutex);
+	return retval;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume);
+
+static const struct i2c_device_id kxtj9_id[] = {
+	{ NAME, 0 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(i2c, kxtj9_id);
+
+static struct i2c_driver kxtj9_driver = {
+	.driver = {
+		.name	= NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &kxtj9_pm_ops,
+	},
+	.probe		= kxtj9_probe,
+	.remove		= __devexit_p(kxtj9_remove),
+	.id_table	= kxtj9_id,
+};
+
+static int __init kxtj9_init(void)
+{
+	return i2c_add_driver(&kxtj9_driver);
+}
+module_init(kxtj9_init);
+
+static void __exit kxtj9_exit(void)
+{
+	i2c_del_driver(&kxtj9_driver);
+}
+module_exit(kxtj9_exit);
+
+MODULE_DESCRIPTION("KXTJ9 accelerometer driver");
+MODULE_AUTHOR("Chris Hudson <chudson@kionix.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
new file mode 100644
index 0000000..20f8f92
--- /dev/null
+++ b/drivers/input/misc/mma8450.c
@@ -0,0 +1,256 @@
+/*
+ *  Driver for Freescale's 3-Axis Accelerometer MMA8450
+ *
+ *  Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input-polldev.h>
+
+#define MMA8450_DRV_NAME	"mma8450"
+
+#define MODE_CHANGE_DELAY_MS	100
+#define POLL_INTERVAL		100
+#define POLL_INTERVAL_MAX	500
+
+/* register definitions */
+#define MMA8450_STATUS		0x00
+#define MMA8450_STATUS_ZXYDR	0x08
+
+#define MMA8450_OUT_X8		0x01
+#define MMA8450_OUT_Y8		0x02
+#define MMA8450_OUT_Z8		0x03
+
+#define MMA8450_OUT_X_LSB	0x05
+#define MMA8450_OUT_X_MSB	0x06
+#define MMA8450_OUT_Y_LSB	0x07
+#define MMA8450_OUT_Y_MSB	0x08
+#define MMA8450_OUT_Z_LSB	0x09
+#define MMA8450_OUT_Z_MSB	0x0a
+
+#define MMA8450_XYZ_DATA_CFG	0x16
+
+#define MMA8450_CTRL_REG1	0x38
+#define MMA8450_CTRL_REG2	0x39
+
+/* mma8450 status */
+struct mma8450 {
+	struct i2c_client	*client;
+	struct input_polled_dev	*idev;
+};
+
+static int mma8450_read(struct mma8450 *m, unsigned off)
+{
+	struct i2c_client *c = m->client;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(c, off);
+	if (ret < 0)
+		dev_err(&c->dev,
+			"failed to read register 0x%02x, error %d\n",
+			off, ret);
+
+	return ret;
+}
+
+static int mma8450_write(struct mma8450 *m, unsigned off, u8 v)
+{
+	struct i2c_client *c = m->client;
+	int error;
+
+	error = i2c_smbus_write_byte_data(c, off, v);
+	if (error < 0) {
+		dev_err(&c->dev,
+			"failed to write to register 0x%02x, error %d\n",
+			off, error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int mma8450_read_xyz(struct mma8450 *m, int *x, int *y, int *z)
+{
+	struct i2c_client *c = m->client;
+	u8 buff[6];
+	int err;
+
+	err = i2c_smbus_read_i2c_block_data(c, MMA8450_OUT_X_LSB, 6, buff);
+	if (err < 0) {
+		dev_err(&c->dev,
+			"failed to read block data at 0x%02x, error %d\n",
+			MMA8450_OUT_X_LSB, err);
+		return err;
+	}
+
+	*x = ((buff[1] << 4) & 0xff0) | (buff[0] & 0xf);
+	*y = ((buff[3] << 4) & 0xff0) | (buff[2] & 0xf);
+	*z = ((buff[5] << 4) & 0xff0) | (buff[4] & 0xf);
+
+	return 0;
+}
+
+static void mma8450_poll(struct input_polled_dev *dev)
+{
+	struct mma8450 *m = dev->private;
+	int x, y, z;
+	int ret;
+	int err;
+
+	ret = mma8450_read(m, MMA8450_STATUS);
+	if (ret < 0)
+		return;
+
+	if (!(ret & MMA8450_STATUS_ZXYDR))
+		return;
+
+	err = mma8450_read_xyz(m, &x, &y, &z);
+	if (err)
+		return;
+
+	input_report_abs(dev->input, ABS_X, x);
+	input_report_abs(dev->input, ABS_Y, y);
+	input_report_abs(dev->input, ABS_Z, z);
+	input_sync(dev->input);
+}
+
+/* Initialize the MMA8450 chip */
+static void mma8450_open(struct input_polled_dev *dev)
+{
+	struct mma8450 *m = dev->private;
+	int err;
+
+	/* enable all events from X/Y/Z, no FIFO */
+	err = mma8450_write(m, MMA8450_XYZ_DATA_CFG, 0x07);
+	if (err)
+		return;
+
+	/*
+	 * Sleep mode poll rate - 50Hz
+	 * System output data rate - 400Hz
+	 * Full scale selection - Active, +/- 2G
+	 */
+	err = mma8450_write(m, MMA8450_CTRL_REG1, 0x01);
+	if (err < 0)
+		return;
+
+	msleep(MODE_CHANGE_DELAY_MS);
+}
+
+static void mma8450_close(struct input_polled_dev *dev)
+{
+	struct mma8450 *m = dev->private;
+
+	mma8450_write(m, MMA8450_CTRL_REG1, 0x00);
+	mma8450_write(m, MMA8450_CTRL_REG2, 0x01);
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+static int __devinit mma8450_probe(struct i2c_client *c,
+				   const struct i2c_device_id *id)
+{
+	struct input_polled_dev *idev;
+	struct mma8450 *m;
+	int err;
+
+	m = kzalloc(sizeof(struct mma8450), GFP_KERNEL);
+	idev = input_allocate_polled_device();
+	if (!m || !idev) {
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	m->client = c;
+	m->idev = idev;
+
+	idev->private		= m;
+	idev->input->name	= MMA8450_DRV_NAME;
+	idev->input->id.bustype	= BUS_I2C;
+	idev->poll		= mma8450_poll;
+	idev->poll_interval	= POLL_INTERVAL;
+	idev->poll_interval_max	= POLL_INTERVAL_MAX;
+	idev->open		= mma8450_open;
+	idev->close		= mma8450_close;
+
+	__set_bit(EV_ABS, idev->input->evbit);
+	input_set_abs_params(idev->input, ABS_X, -2048, 2047, 32, 32);
+	input_set_abs_params(idev->input, ABS_Y, -2048, 2047, 32, 32);
+	input_set_abs_params(idev->input, ABS_Z, -2048, 2047, 32, 32);
+
+	err = input_register_polled_device(idev);
+	if (err) {
+		dev_err(&c->dev, "failed to register polled input device\n");
+		goto err_free_mem;
+	}
+
+	return 0;
+
+err_free_mem:
+	input_free_polled_device(idev);
+	kfree(m);
+	return err;
+}
+
+static int __devexit mma8450_remove(struct i2c_client *c)
+{
+	struct mma8450 *m = i2c_get_clientdata(c);
+	struct input_polled_dev *idev = m->idev;
+
+	input_unregister_polled_device(idev);
+	input_free_polled_device(idev);
+	kfree(m);
+
+	return 0;
+}
+
+static const struct i2c_device_id mma8450_id[] = {
+	{ MMA8450_DRV_NAME, 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, mma8450_id);
+
+static struct i2c_driver mma8450_driver = {
+	.driver = {
+		.name	= MMA8450_DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= mma8450_probe,
+	.remove		= __devexit_p(mma8450_remove),
+	.id_table	= mma8450_id,
+};
+
+static int __init mma8450_init(void)
+{
+	return i2c_add_driver(&mma8450_driver);
+}
+module_init(mma8450_init);
+
+static void __exit mma8450_exit(void)
+{
+	i2c_del_driver(&mma8450_driver);
+}
+module_exit(mma8450_exit);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MMA8450 3-Axis Accelerometer Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c
new file mode 100644
index 0000000..b95fac1
--- /dev/null
+++ b/drivers/input/misc/mpu3050.c
@@ -0,0 +1,376 @@
+/*
+ * MPU3050 Tri-axis gyroscope driver
+ *
+ * Copyright (C) 2011 Wistron Co.Ltd
+ * Joseph Lai <joseph_lai@wistron.com>
+ *
+ * Trimmed down by Alan Cox <alan@linux.intel.com> to produce this version
+ *
+ * This is a 'lite' version of the driver, while we consider the right way
+ * to present the other features to user space. In particular it requires the
+ * device has an IRQ, and it only provides an input interface, so is not much
+ * use for device orientation. A fuller version is available from the Meego
+ * tree.
+ *
+ * This program is based on bma023.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#define MPU3050_CHIP_ID_REG	0x00
+#define MPU3050_CHIP_ID		0x69
+#define MPU3050_XOUT_H		0x1D
+#define MPU3050_PWR_MGM		0x3E
+#define MPU3050_PWR_MGM_POS	6
+#define MPU3050_PWR_MGM_MASK	0x40
+
+#define MPU3050_AUTO_DELAY	1000
+
+#define MPU3050_MIN_VALUE	-32768
+#define MPU3050_MAX_VALUE	32767
+
+struct axis_data {
+	s16 x;
+	s16 y;
+	s16 z;
+};
+
+struct mpu3050_sensor {
+	struct i2c_client *client;
+	struct device *dev;
+	struct input_dev *idev;
+};
+
+/**
+ *	mpu3050_xyz_read_reg	-	read the axes values
+ *	@buffer: provide register addr and get register
+ *	@length: length of register
+ *
+ *	Reads the register values in one transaction or returns a negative
+ *	error code on failure.
+ */
+static int mpu3050_xyz_read_reg(struct i2c_client *client,
+			       u8 *buffer, int length)
+{
+	/*
+	 * Annoying we can't make this const because the i2c layer doesn't
+	 * declare input buffers const.
+	 */
+	char cmd = MPU3050_XOUT_H;
+	struct i2c_msg msg[] = {
+		{
+			.addr = client->addr,
+			.flags = 0,
+			.len = 1,
+			.buf = &cmd,
+		},
+		{
+			.addr = client->addr,
+			.flags = I2C_M_RD,
+			.len = length,
+			.buf = buffer,
+		},
+	};
+
+	return i2c_transfer(client->adapter, msg, 2);
+}
+
+/**
+ *	mpu3050_read_xyz	-	get co-ordinates from device
+ *	@client: i2c address of sensor
+ *	@coords: co-ordinates to update
+ *
+ *	Return the converted X Y and Z co-ordinates from the sensor device
+ */
+static void mpu3050_read_xyz(struct i2c_client *client,
+			     struct axis_data *coords)
+{
+	u16 buffer[3];
+
+	mpu3050_xyz_read_reg(client, (u8 *)buffer, 6);
+	coords->x = be16_to_cpu(buffer[0]);
+	coords->y = be16_to_cpu(buffer[1]);
+	coords->z = be16_to_cpu(buffer[2]);
+	dev_dbg(&client->dev, "%s: x %d, y %d, z %d\n", __func__,
+					coords->x, coords->y, coords->z);
+}
+
+/**
+ *	mpu3050_set_power_mode	-	set the power mode
+ *	@client: i2c client for the sensor
+ *	@val: value to switch on/off of power, 1: normal power, 0: low power
+ *
+ *	Put device to normal-power mode or low-power mode.
+ */
+static void mpu3050_set_power_mode(struct i2c_client *client, u8 val)
+{
+	u8 value;
+
+	value = i2c_smbus_read_byte_data(client, MPU3050_PWR_MGM);
+	value = (value & ~MPU3050_PWR_MGM_MASK) |
+		(((val << MPU3050_PWR_MGM_POS) & MPU3050_PWR_MGM_MASK) ^
+		 MPU3050_PWR_MGM_MASK);
+	i2c_smbus_write_byte_data(client, MPU3050_PWR_MGM, value);
+}
+
+/**
+ *	mpu3050_input_open	-	called on input event open
+ *	@input: input dev of opened device
+ *
+ *	The input layer calls this function when input event is opened. The
+ *	function will push the device to resume. Then, the device is ready
+ *	to provide data.
+ */
+static int mpu3050_input_open(struct input_dev *input)
+{
+	struct mpu3050_sensor *sensor = input_get_drvdata(input);
+
+	pm_runtime_get(sensor->dev);
+
+	return 0;
+}
+
+/**
+ *	mpu3050_input_close	-	called on input event close
+ *	@input: input dev of closed device
+ *
+ *	The input layer calls this function when input event is closed. The
+ *	function will push the device to suspend.
+ */
+static void mpu3050_input_close(struct input_dev *input)
+{
+	struct mpu3050_sensor *sensor = input_get_drvdata(input);
+
+	pm_runtime_put(sensor->dev);
+}
+
+/**
+ *	mpu3050_interrupt_thread	-	handle an IRQ
+ *	@irq: interrupt numner
+ *	@data: the sensor
+ *
+ *	Called by the kernel single threaded after an interrupt occurs. Read
+ *	the sensor data and generate an input event for it.
+ */
+static irqreturn_t mpu3050_interrupt_thread(int irq, void *data)
+{
+	struct mpu3050_sensor *sensor = data;
+	struct axis_data axis;
+
+	mpu3050_read_xyz(sensor->client, &axis);
+
+	input_report_abs(sensor->idev, ABS_X, axis.x);
+	input_report_abs(sensor->idev, ABS_Y, axis.y);
+	input_report_abs(sensor->idev, ABS_Z, axis.z);
+	input_sync(sensor->idev);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ *	mpu3050_probe	-	device detection callback
+ *	@client: i2c client of found device
+ *	@id: id match information
+ *
+ *	The I2C layer calls us when it believes a sensor is present at this
+ *	address. Probe to see if this is correct and to validate the device.
+ *
+ *	If present install the relevant sysfs interfaces and input device.
+ */
+static int __devinit mpu3050_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct mpu3050_sensor *sensor;
+	struct input_dev *idev;
+	int ret;
+	int error;
+
+	sensor = kzalloc(sizeof(struct mpu3050_sensor), GFP_KERNEL);
+	idev = input_allocate_device();
+	if (!sensor || !idev) {
+		dev_err(&client->dev, "failed to allocate driver data\n");
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	sensor->client = client;
+	sensor->dev = &client->dev;
+	sensor->idev = idev;
+
+	mpu3050_set_power_mode(client, 1);
+	msleep(10);
+
+	ret = i2c_smbus_read_byte_data(client, MPU3050_CHIP_ID_REG);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed to detect device\n");
+		error = -ENXIO;
+		goto err_free_mem;
+	}
+
+	if (ret != MPU3050_CHIP_ID) {
+		dev_err(&client->dev, "unsupported chip id\n");
+		error = -ENXIO;
+		goto err_free_mem;
+	}
+
+	idev->name = "MPU3050";
+	idev->id.bustype = BUS_I2C;
+	idev->dev.parent = &client->dev;
+
+	idev->open = mpu3050_input_open;
+	idev->close = mpu3050_input_close;
+
+	__set_bit(EV_ABS, idev->evbit);
+	input_set_abs_params(idev, ABS_X,
+			     MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+	input_set_abs_params(idev, ABS_Y,
+			     MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+	input_set_abs_params(idev, ABS_Z,
+			     MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+
+	input_set_drvdata(idev, sensor);
+
+	pm_runtime_set_active(&client->dev);
+
+	error = request_threaded_irq(client->irq,
+				     NULL, mpu3050_interrupt_thread,
+				     IRQF_TRIGGER_RISING,
+				     "mpu_int", sensor);
+	if (error) {
+		dev_err(&client->dev,
+			"can't get IRQ %d, error %d\n", client->irq, error);
+		goto err_pm_set_suspended;
+	}
+
+	error = input_register_device(idev);
+	if (error) {
+		dev_err(&client->dev, "failed to register input device\n");
+		goto err_free_irq;
+	}
+
+	pm_runtime_enable(&client->dev);
+	pm_runtime_set_autosuspend_delay(&client->dev, MPU3050_AUTO_DELAY);
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, sensor);
+err_pm_set_suspended:
+	pm_runtime_set_suspended(&client->dev);
+err_free_mem:
+	input_unregister_device(idev);
+	kfree(sensor);
+	return error;
+}
+
+/**
+ *	mpu3050_remove	-	remove a sensor
+ *	@client: i2c client of sensor being removed
+ *
+ *	Our sensor is going away, clean up the resources.
+ */
+static int __devexit mpu3050_remove(struct i2c_client *client)
+{
+	struct mpu3050_sensor *sensor = i2c_get_clientdata(client);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	free_irq(client->irq, sensor);
+	input_unregister_device(sensor->idev);
+	kfree(sensor);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/**
+ *	mpu3050_suspend		-	called on device suspend
+ *	@dev: device being suspended
+ *
+ *	Put the device into sleep mode before we suspend the machine.
+ */
+static int mpu3050_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	mpu3050_set_power_mode(client, 0);
+
+	return 0;
+}
+
+/**
+ *	mpu3050_resume		-	called on device resume
+ *	@dev: device being resumed
+ *
+ *	Put the device into powered mode on resume.
+ */
+static int mpu3050_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	mpu3050_set_power_mode(client, 1);
+	msleep(100);  /* wait for gyro chip resume */
+
+	return 0;
+}
+#endif
+
+static UNIVERSAL_DEV_PM_OPS(mpu3050_pm, mpu3050_suspend, mpu3050_resume, NULL);
+
+static const struct i2c_device_id mpu3050_ids[] = {
+	{ "mpu3050", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mpu3050_ids);
+
+static struct i2c_driver mpu3050_i2c_driver = {
+	.driver	= {
+		.name	= "mpu3050",
+		.owner	= THIS_MODULE,
+		.pm	= &mpu3050_pm,
+	},
+	.probe		= mpu3050_probe,
+	.remove		= __devexit_p(mpu3050_remove),
+	.id_table	= mpu3050_ids,
+};
+
+static int __init mpu3050_init(void)
+{
+	return i2c_add_driver(&mpu3050_i2c_driver);
+}
+module_init(mpu3050_init);
+
+static void __exit mpu3050_exit(void)
+{
+	i2c_del_driver(&mpu3050_i2c_driver);
+}
+module_exit(mpu3050_exit);
+
+MODULE_AUTHOR("Wistron Corp.");
+MODULE_DESCRIPTION("MPU3050 Tri-axis gyroscope driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 62bae99..ad2e51c 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -373,7 +373,7 @@
 
 static int __init xenkbd_init(void)
 {
-	if (!xen_pv_domain())
+	if (!xen_domain())
 		return -ENODEV;
 
 	/* Nothing to do if running in dom0. */
diff --git a/drivers/input/mouse/gpio_mouse.c b/drivers/input/mouse/gpio_mouse.c
index 7b6ce17..58902fb 100644
--- a/drivers/input/mouse/gpio_mouse.c
+++ b/drivers/input/mouse/gpio_mouse.c
@@ -191,7 +191,7 @@
 }
 module_exit(gpio_mouse_exit);
 
-MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("GPIO mouse driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:gpio_mouse"); /* work with hotplug and coldplug */
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index c31ad11..83bcaba 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -33,7 +33,7 @@
 static int lifebook_limit_serio3(const struct dmi_system_id *d)
 {
 	desired_serio_phys = "isa0060/serio3";
-	return 0;
+	return 1;
 }
 
 static bool lifebook_use_6byte_proto;
@@ -41,7 +41,7 @@
 static int lifebook_set_6byte_proto(const struct dmi_system_id *d)
 {
 	lifebook_use_6byte_proto = true;
-	return 0;
+	return 1;
 }
 
 static const struct dmi_system_id __initconst lifebook_dmi_table[] = {
diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c
index 943cfec..6c5d84f 100644
--- a/drivers/input/mouse/pxa930_trkball.c
+++ b/drivers/input/mouse/pxa930_trkball.c
@@ -12,7 +12,6 @@
 
 #include <linux/init.h>
 #include <linux/input.h>
-#include <linux/version.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
index 1242775..2fc887a 100644
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -20,7 +20,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/input.h>
 #include <linux/ctype.h>
 #include <linux/libps2.h>
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index e06e045..5538fc6 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -207,27 +207,37 @@
 static int synaptics_resolution(struct psmouse *psmouse)
 {
 	struct synaptics_data *priv = psmouse->private;
-	unsigned char res[3];
-	unsigned char max[3];
+	unsigned char resp[3];
 
 	if (SYN_ID_MAJOR(priv->identity) < 4)
 		return 0;
 
-	if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, res) == 0) {
-		if (res[0] != 0 && (res[1] & 0x80) && res[2] != 0) {
-			priv->x_res = res[0]; /* x resolution in units/mm */
-			priv->y_res = res[2]; /* y resolution in units/mm */
+	if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, resp) == 0) {
+		if (resp[0] != 0 && (resp[1] & 0x80) && resp[2] != 0) {
+			priv->x_res = resp[0]; /* x resolution in units/mm */
+			priv->y_res = resp[2]; /* y resolution in units/mm */
 		}
 	}
 
 	if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 &&
 	    SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) {
-		if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_DIMENSIONS, max)) {
-			printk(KERN_ERR "Synaptics claims to have dimensions query,"
-			       " but I'm not able to read it.\n");
+		if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) {
+			printk(KERN_ERR "Synaptics claims to have max coordinates"
+			       " query, but I'm not able to read it.\n");
 		} else {
-			priv->x_max = (max[0] << 5) | ((max[1] & 0x0f) << 1);
-			priv->y_max = (max[2] << 5) | ((max[1] & 0xf0) >> 3);
+			priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
+			priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
+		}
+	}
+
+	if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 &&
+	    SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) {
+		if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) {
+			printk(KERN_ERR "Synaptics claims to have min coordinates"
+			       " query, but I'm not able to read it.\n");
+		} else {
+			priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
+			priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
 		}
 	}
 
@@ -406,26 +416,10 @@
 	memset(hw, 0, sizeof(struct synaptics_hw_state));
 
 	if (SYN_MODEL_NEWABS(priv->model_id)) {
-		hw->x = (((buf[3] & 0x10) << 8) |
-			 ((buf[1] & 0x0f) << 8) |
-			 buf[4]);
-		hw->y = (((buf[3] & 0x20) << 7) |
-			 ((buf[1] & 0xf0) << 4) |
-			 buf[5]);
-
-		hw->z = buf[2];
 		hw->w = (((buf[0] & 0x30) >> 2) |
 			 ((buf[0] & 0x04) >> 1) |
 			 ((buf[3] & 0x04) >> 2));
 
-		if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) {
-			/* Gesture packet: (x, y, z) at half resolution */
-			priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1;
-			priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1;
-			priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1;
-			return 1;
-		}
-
 		hw->left  = (buf[0] & 0x01) ? 1 : 0;
 		hw->right = (buf[0] & 0x02) ? 1 : 0;
 
@@ -448,6 +442,22 @@
 			hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0;
 		}
 
+		if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) {
+			/* Gesture packet: (x, y, z) at half resolution */
+			priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1;
+			priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1;
+			priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1;
+			return 1;
+		}
+
+		hw->x = (((buf[3] & 0x10) << 8) |
+			 ((buf[1] & 0x0f) << 8) |
+			 buf[4]);
+		hw->y = (((buf[3] & 0x20) << 7) |
+			 ((buf[1] & 0xf0) << 4) |
+			 buf[5]);
+		hw->z = buf[2];
+
 		if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) &&
 		    ((buf[0] ^ buf[3]) & 0x02)) {
 			switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) {
@@ -485,7 +495,8 @@
 	return 0;
 }
 
-static void set_slot(struct input_dev *dev, int slot, bool active, int x, int y)
+static void synaptics_report_semi_mt_slot(struct input_dev *dev, int slot,
+					  bool active, int x, int y)
 {
 	input_mt_slot(dev, slot);
 	input_mt_report_slot_state(dev, MT_TOOL_FINGER, active);
@@ -502,14 +513,16 @@
 					  int num_fingers)
 {
 	if (num_fingers >= 2) {
-		set_slot(dev, 0, true, min(a->x, b->x), min(a->y, b->y));
-		set_slot(dev, 1, true, max(a->x, b->x), max(a->y, b->y));
+		synaptics_report_semi_mt_slot(dev, 0, true, min(a->x, b->x),
+					      min(a->y, b->y));
+		synaptics_report_semi_mt_slot(dev, 1, true, max(a->x, b->x),
+					      max(a->y, b->y));
 	} else if (num_fingers == 1) {
-		set_slot(dev, 0, true, a->x, a->y);
-		set_slot(dev, 1, false, 0, 0);
+		synaptics_report_semi_mt_slot(dev, 0, true, a->x, a->y);
+		synaptics_report_semi_mt_slot(dev, 1, false, 0, 0);
 	} else {
-		set_slot(dev, 0, false, 0, 0);
-		set_slot(dev, 1, false, 0, 0);
+		synaptics_report_semi_mt_slot(dev, 0, false, 0, 0);
+		synaptics_report_semi_mt_slot(dev, 1, false, 0, 0);
 	}
 }
 
@@ -684,23 +697,36 @@
 static void set_input_params(struct input_dev *dev, struct synaptics_data *priv)
 {
 	int i;
+	int fuzz = SYN_CAP_REDUCED_FILTERING(priv->ext_cap_0c) ?
+			SYN_REDUCED_FILTER_FUZZ : 0;
 
 	__set_bit(INPUT_PROP_POINTER, dev->propbit);
 
 	__set_bit(EV_ABS, dev->evbit);
 	input_set_abs_params(dev, ABS_X,
-			     XMIN_NOMINAL, priv->x_max ?: XMAX_NOMINAL, 0, 0);
+			     priv->x_min ?: XMIN_NOMINAL,
+			     priv->x_max ?: XMAX_NOMINAL,
+			     fuzz, 0);
 	input_set_abs_params(dev, ABS_Y,
-			     YMIN_NOMINAL, priv->y_max ?: YMAX_NOMINAL, 0, 0);
+			     priv->y_min ?: YMIN_NOMINAL,
+			     priv->y_max ?: YMAX_NOMINAL,
+			     fuzz, 0);
 	input_set_abs_params(dev, ABS_PRESSURE, 0, 255, 0, 0);
 
 	if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c)) {
 		__set_bit(INPUT_PROP_SEMI_MT, dev->propbit);
 		input_mt_init_slots(dev, 2);
-		input_set_abs_params(dev, ABS_MT_POSITION_X, XMIN_NOMINAL,
-				     priv->x_max ?: XMAX_NOMINAL, 0, 0);
-		input_set_abs_params(dev, ABS_MT_POSITION_Y, YMIN_NOMINAL,
-				     priv->y_max ?: YMAX_NOMINAL, 0, 0);
+		input_set_abs_params(dev, ABS_MT_POSITION_X,
+				     priv->x_min ?: XMIN_NOMINAL,
+				     priv->x_max ?: XMAX_NOMINAL,
+				     fuzz, 0);
+		input_set_abs_params(dev, ABS_MT_POSITION_Y,
+				     priv->y_min ?: YMIN_NOMINAL,
+				     priv->y_max ?: YMAX_NOMINAL,
+				     fuzz, 0);
+
+		input_abs_set_res(dev, ABS_MT_POSITION_X, priv->x_res);
+		input_abs_set_res(dev, ABS_MT_POSITION_Y, priv->y_res);
 	}
 
 	if (SYN_CAP_PALMDETECT(priv->capabilities))
@@ -971,4 +997,3 @@
 }
 
 #endif /* CONFIG_MOUSE_PS2_SYNAPTICS */
-
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index 7453938..ca040aa 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -19,7 +19,8 @@
 #define SYN_QUE_RESOLUTION		0x08
 #define SYN_QUE_EXT_CAPAB		0x09
 #define SYN_QUE_EXT_CAPAB_0C		0x0c
-#define SYN_QUE_EXT_DIMENSIONS		0x0d
+#define SYN_QUE_EXT_MAX_COORDS		0x0d
+#define SYN_QUE_EXT_MIN_COORDS		0x0f
 
 /* synatics modes */
 #define SYN_BIT_ABSOLUTE_MODE		(1 << 7)
@@ -66,18 +67,21 @@
  * 1	0x60	multifinger mode	identifies firmware finger counting
  *					(not reporting!) algorithm.
  *					Not particularly meaningful
- * 1	0x80    covered pad		W clipped to 14, 15 == pad mostly covered
- * 2	0x01    clickpad bit 1		2-button ClickPad
- * 2	0x02    deluxe LED controls	touchpad support LED commands
+ * 1	0x80	covered pad		W clipped to 14, 15 == pad mostly covered
+ * 2	0x01	clickpad bit 1		2-button ClickPad
+ * 2	0x02	deluxe LED controls	touchpad support LED commands
  *					ala multimedia control bar
  * 2	0x04	reduced filtering	firmware does less filtering on
  *					position data, driver should watch
  *					for noise.
+ * 2	0x20	report min		query 0x0f gives min coord reported
  */
 #define SYN_CAP_CLICKPAD(ex0c)		((ex0c) & 0x100000) /* 1-button ClickPad */
 #define SYN_CAP_CLICKPAD2BTN(ex0c)	((ex0c) & 0x000100) /* 2-button ClickPad */
 #define SYN_CAP_MAX_DIMENSIONS(ex0c)	((ex0c) & 0x020000)
+#define SYN_CAP_MIN_DIMENSIONS(ex0c)	((ex0c) & 0x002000)
 #define SYN_CAP_ADV_GESTURE(ex0c)	((ex0c) & 0x080000)
+#define SYN_CAP_REDUCED_FILTERING(ex0c)	((ex0c) & 0x000400)
 
 /* synaptics modes query bits */
 #define SYN_MODE_ABSOLUTE(m)		((m) & (1 << 7))
@@ -104,6 +108,9 @@
 #define SYN_NEWABS_RELAXED		2
 #define SYN_OLDABS			3
 
+/* amount to fuzz position data when touchpad reports reduced filtering */
+#define SYN_REDUCED_FILTER_FUZZ		8
+
 /*
  * A structure to describe the state of the touchpad hardware (buttons and pad)
  */
@@ -130,7 +137,8 @@
 	unsigned long int ext_cap_0c;		/* Ext Caps from 0x0c query */
 	unsigned long int identity;		/* Identification */
 	unsigned int x_res, y_res;		/* X/Y resolution in units/mm */
-	unsigned int x_max, y_max;		/* Max dimensions (from FW) */
+	unsigned int x_max, y_max;		/* Max coordinates (from FW) */
+	unsigned int x_min, y_min;		/* Min coordinates (from FW) */
 
 	unsigned char pkt_type;			/* packet type - old, new, etc */
 	unsigned char mode;			/* current mode byte */
diff --git a/drivers/input/serio/at32psif.c b/drivers/input/serio/at32psif.c
index 6ee8f0d..95280f9 100644
--- a/drivers/input/serio/at32psif.c
+++ b/drivers/input/serio/at32psif.c
@@ -372,6 +372,6 @@
 module_init(psif_init);
 module_exit(psif_exit);
 
-MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("Atmel AVR32 PSIF PS/2 driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 4220620..979c443 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -795,7 +795,7 @@
 
 /************************* Keepalive timer task *********************/
 
-void hp_sdc_kicker (unsigned long data)
+static void hp_sdc_kicker(unsigned long data)
 {
 	tasklet_schedule(&hp_sdc.task);
 	/* Re-insert the periodic task. */
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index 0a619c5..6d89fd1 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -225,7 +225,6 @@
 	/* toolMode codes
 	 */
 #define AIPTEK_TOOL_BUTTON_PEN_MODE			BTN_TOOL_PEN
-#define AIPTEK_TOOL_BUTTON_PEN_MODE			BTN_TOOL_PEN
 #define AIPTEK_TOOL_BUTTON_PENCIL_MODE			BTN_TOOL_PENCIL
 #define AIPTEK_TOOL_BUTTON_BRUSH_MODE			BTN_TOOL_BRUSH
 #define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE		BTN_TOOL_AIRBRUSH
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 08ba5ad..03ebcc8 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -15,6 +15,7 @@
 #include "wacom_wac.h"
 #include "wacom.h"
 #include <linux/input/mt.h>
+#include <linux/hid.h>
 
 /* resolution for penabled devices */
 #define WACOM_PL_RES		20
@@ -264,6 +265,7 @@
 			wacom->id[0] = 0;
 		input_report_abs(input, ABS_MISC, wacom->id[0]); /* report tool id */
 		input_report_key(input, wacom->tool[0], prox);
+		input_event(input, EV_MSC, MSC_SERIAL, 1);
 		input_sync(input); /* sync last event */
 	}
 
@@ -273,11 +275,10 @@
 		prox = data[7] & 0xf8;
 		if (prox || wacom->id[1]) {
 			wacom->id[1] = PAD_DEVICE_ID;
-			input_report_key(input, BTN_0, (data[7] & 0x40));
-			input_report_key(input, BTN_4, (data[7] & 0x80));
+			input_report_key(input, BTN_BACK, (data[7] & 0x40));
+			input_report_key(input, BTN_FORWARD, (data[7] & 0x80));
 			rw = ((data[7] & 0x18) >> 3) - ((data[7] & 0x20) >> 3);
 			input_report_rel(input, REL_WHEEL, rw);
-			input_report_key(input, BTN_TOOL_FINGER, 0xf0);
 			if (!prox)
 				wacom->id[1] = 0;
 			input_report_abs(input, ABS_MISC, wacom->id[1]);
@@ -290,18 +291,17 @@
 		prox = (data[7] & 0xf8) || data[8];
 		if (prox || wacom->id[1]) {
 			wacom->id[1] = PAD_DEVICE_ID;
-			input_report_key(input, BTN_0, (data[7] & 0x08));
-			input_report_key(input, BTN_1, (data[7] & 0x20));
-			input_report_key(input, BTN_4, (data[7] & 0x10));
-			input_report_key(input, BTN_5, (data[7] & 0x40));
+			input_report_key(input, BTN_BACK, (data[7] & 0x08));
+			input_report_key(input, BTN_LEFT, (data[7] & 0x20));
+			input_report_key(input, BTN_FORWARD, (data[7] & 0x10));
+			input_report_key(input, BTN_RIGHT, (data[7] & 0x40));
 			input_report_abs(input, ABS_WHEEL, (data[8] & 0x7f));
-			input_report_key(input, BTN_TOOL_FINGER, 0xf0);
 			if (!prox)
 				wacom->id[1] = 0;
 			input_report_abs(input, ABS_MISC, wacom->id[1]);
 			input_event(input, EV_MSC, MSC_SERIAL, 0xf0);
+			retval = 1;
 		}
-		retval = 1;
 		break;
 	}
 exit:
@@ -494,10 +494,6 @@
 
 	/* pad packets. Works as a second tool and is always in prox */
 	if (data[0] == WACOM_REPORT_INTUOSPAD) {
-		/* initiate the pad as a device */
-		if (wacom->tool[1] != BTN_TOOL_FINGER)
-			wacom->tool[1] = BTN_TOOL_FINGER;
-
 		if (features->type >= INTUOS4S && features->type <= INTUOS4L) {
 			input_report_key(input, BTN_0, (data[2] & 0x01));
 			input_report_key(input, BTN_1, (data[3] & 0x01));
@@ -1080,18 +1076,14 @@
 
 	switch (wacom_wac->features.type) {
 	case WACOM_MO:
-		__set_bit(BTN_1, input_dev->keybit);
-		__set_bit(BTN_5, input_dev->keybit);
-
 		input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
 		/* fall through */
 
 	case WACOM_G4:
 		input_set_capability(input_dev, EV_MSC, MSC_SERIAL);
 
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-		__set_bit(BTN_0, input_dev->keybit);
-		__set_bit(BTN_4, input_dev->keybit);
+		__set_bit(BTN_BACK, input_dev->keybit);
+		__set_bit(BTN_FORWARD, input_dev->keybit);
 		/* fall through */
 
 	case GRAPHIRE:
@@ -1127,10 +1119,12 @@
 	case CINTIQ:
 		for (i = 0; i < 8; i++)
 			__set_bit(BTN_0 + i, input_dev->keybit);
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
 
-		input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
-		input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
+		if (wacom_wac->features.type != WACOM_21UX2) {
+			input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
+			input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
+		}
+
 		input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
 		wacom_setup_cintiq(wacom_wac);
 		break;
@@ -1151,8 +1145,6 @@
 		__set_bit(BTN_2, input_dev->keybit);
 		__set_bit(BTN_3, input_dev->keybit);
 
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-
 		input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
 		input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
 		/* fall through */
@@ -1170,7 +1162,6 @@
 	case INTUOS4S:
 		for (i = 0; i < 7; i++)
 			__set_bit(BTN_0 + i, input_dev->keybit);
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
 
 		input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
 		wacom_setup_intuos(wacom_wac);
@@ -1295,6 +1286,12 @@
 static const struct wacom_features wacom_features_0x69 =
 	{ "Wacom Bamboo1",        WACOM_PKGLEN_GRAPHIRE,   5104,  3712,  511,
 	  63, GRAPHIRE, WACOM_PENPRTN_RES, WACOM_PENPRTN_RES };
+static const struct wacom_features wacom_features_0x6A =
+	{ "Wacom Bamboo1 4x6",    WACOM_PKGLEN_GRAPHIRE,  14760,  9225, 1023,
+	  63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x6B =
+	{ "Wacom Bamboo1 5x8",    WACOM_PKGLEN_GRAPHIRE,  21648, 13530, 1023,
+	  63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x20 =
 	{ "Wacom Intuos 4x5",     WACOM_PKGLEN_INTUOS,    12700, 10600, 1023,
 	  31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1427,6 +1424,9 @@
 static const struct wacom_features wacom_features_0x93 =
 	{ "Wacom ISDv4 93",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
 	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x97 =
+	{ "Wacom ISDv4 97",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  511,
+	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x9A =
 	{ "Wacom ISDv4 9A",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
 	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1458,7 +1458,7 @@
 	{ "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN,     21648, 13530, 1023,
 	  63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD4 =
-	{ "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200,  255,
+	{ "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
 	  63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD6 =
 	{ "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN,   14720,  9200, 1023,
@@ -1483,6 +1483,11 @@
 	USB_DEVICE(USB_VENDOR_ID_WACOM, prod),			\
 	.driver_info = (kernel_ulong_t)&wacom_features_##prod
 
+#define USB_DEVICE_DETAILED(prod, class, sub, proto)			\
+	USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class,	\
+				      sub, proto),			\
+	.driver_info = (kernel_ulong_t)&wacom_features_##prod
+
 #define USB_DEVICE_LENOVO(prod)					\
 	USB_DEVICE(USB_VENDOR_ID_LENOVO, prod),			\
 	.driver_info = (kernel_ulong_t)&wacom_features_##prod
@@ -1506,6 +1511,8 @@
 	{ USB_DEVICE_WACOM(0x64) },
 	{ USB_DEVICE_WACOM(0x65) },
 	{ USB_DEVICE_WACOM(0x69) },
+	{ USB_DEVICE_WACOM(0x6A) },
+	{ USB_DEVICE_WACOM(0x6B) },
 	{ USB_DEVICE_WACOM(0x20) },
 	{ USB_DEVICE_WACOM(0x21) },
 	{ USB_DEVICE_WACOM(0x22) },
@@ -1545,7 +1552,13 @@
 	{ USB_DEVICE_WACOM(0xC5) },
 	{ USB_DEVICE_WACOM(0xC6) },
 	{ USB_DEVICE_WACOM(0xC7) },
-	{ USB_DEVICE_WACOM(0xCE) },
+	/*
+	 * DTU-2231 has two interfaces on the same configuration,
+	 * only one is used.
+	 */
+	{ USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID,
+			      USB_INTERFACE_SUBCLASS_BOOT,
+			      USB_INTERFACE_PROTOCOL_MOUSE) },
 	{ USB_DEVICE_WACOM(0xD0) },
 	{ USB_DEVICE_WACOM(0xD1) },
 	{ USB_DEVICE_WACOM(0xD2) },
@@ -1560,6 +1573,7 @@
 	{ USB_DEVICE_WACOM(0xCC) },
 	{ USB_DEVICE_WACOM(0x90) },
 	{ USB_DEVICE_WACOM(0x93) },
+	{ USB_DEVICE_WACOM(0x97) },
 	{ USB_DEVICE_WACOM(0x9A) },
 	{ USB_DEVICE_WACOM(0x9F) },
 	{ USB_DEVICE_WACOM(0xE2) },
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 5196861..d507b9b 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -967,17 +967,12 @@
 		ts->get_pendown_state = pdata->get_pendown_state;
 	} else if (gpio_is_valid(pdata->gpio_pendown)) {
 
-		err = gpio_request(pdata->gpio_pendown, "ads7846_pendown");
+		err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN,
+				       "ads7846_pendown");
 		if (err) {
-			dev_err(&spi->dev, "failed to request pendown GPIO%d\n",
-				pdata->gpio_pendown);
-			return err;
-		}
-		err = gpio_direction_input(pdata->gpio_pendown);
-		if (err) {
-			dev_err(&spi->dev, "failed to setup pendown GPIO%d\n",
-				pdata->gpio_pendown);
-			gpio_free(pdata->gpio_pendown);
+			dev_err(&spi->dev,
+				"failed to request/setup pendown GPIO%d: %d\n",
+				pdata->gpio_pendown, err);
 			return err;
 		}
 
diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c
index fa8e56b..8034cbb 100644
--- a/drivers/input/touchscreen/atmel-wm97xx.c
+++ b/drivers/input/touchscreen/atmel-wm97xx.c
@@ -164,7 +164,7 @@
 
 		data = ac97c_readl(atmel_wm97xx, CBRHR);
 		value = data & 0x0fff;
-		source = data & WM97XX_ADCSRC_MASK;
+		source = data & WM97XX_ADCSEL_MASK;
 		pen_down = (data & WM97XX_PEN_DOWN) >> 8;
 
 		if (source == WM97XX_ADCSEL_X)
@@ -442,6 +442,6 @@
 }
 module_exit(atmel_wm97xx_exit);
 
-MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("wm97xx continuous touch driver for Atmel AT91 and AVR32");
 MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 1e61387..ae00604 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -48,41 +48,47 @@
 #define MXT_OBJECT_SIZE		6
 
 /* Object types */
-#define MXT_DEBUG_DIAGNOSTIC	37
-#define MXT_GEN_MESSAGE		5
-#define MXT_GEN_COMMAND		6
-#define MXT_GEN_POWER		7
-#define MXT_GEN_ACQUIRE		8
-#define MXT_TOUCH_MULTI		9
-#define MXT_TOUCH_KEYARRAY	15
-#define MXT_TOUCH_PROXIMITY	23
-#define MXT_PROCI_GRIPFACE	20
-#define MXT_PROCG_NOISE		22
-#define MXT_PROCI_ONETOUCH	24
-#define MXT_PROCI_TWOTOUCH	27
-#define MXT_PROCI_GRIP		40
-#define MXT_PROCI_PALM		41
-#define MXT_SPT_COMMSCONFIG	18
-#define MXT_SPT_GPIOPWM		19
-#define MXT_SPT_SELFTEST	25
-#define MXT_SPT_CTECONFIG	28
-#define MXT_SPT_USERDATA	38
-#define MXT_SPT_DIGITIZER	43
-#define MXT_SPT_MESSAGECOUNT	44
+#define MXT_DEBUG_DIAGNOSTIC_T37	37
+#define MXT_GEN_MESSAGE_T5		5
+#define MXT_GEN_COMMAND_T6		6
+#define MXT_GEN_POWER_T7		7
+#define MXT_GEN_ACQUIRE_T8		8
+#define MXT_GEN_DATASOURCE_T53		53
+#define MXT_TOUCH_MULTI_T9		9
+#define MXT_TOUCH_KEYARRAY_T15		15
+#define MXT_TOUCH_PROXIMITY_T23		23
+#define MXT_TOUCH_PROXKEY_T52		52
+#define MXT_PROCI_GRIPFACE_T20		20
+#define MXT_PROCG_NOISE_T22		22
+#define MXT_PROCI_ONETOUCH_T24		24
+#define MXT_PROCI_TWOTOUCH_T27		27
+#define MXT_PROCI_GRIP_T40		40
+#define MXT_PROCI_PALM_T41		41
+#define MXT_PROCI_TOUCHSUPPRESSION_T42	42
+#define MXT_PROCI_STYLUS_T47		47
+#define MXT_PROCG_NOISESUPPRESSION_T48	48
+#define MXT_SPT_COMMSCONFIG_T18		18
+#define MXT_SPT_GPIOPWM_T19		19
+#define MXT_SPT_SELFTEST_T25		25
+#define MXT_SPT_CTECONFIG_T28		28
+#define MXT_SPT_USERDATA_T38		38
+#define MXT_SPT_DIGITIZER_T43		43
+#define MXT_SPT_MESSAGECOUNT_T44	44
+#define MXT_SPT_CTECONFIG_T46		46
 
-/* MXT_GEN_COMMAND field */
+/* MXT_GEN_COMMAND_T6 field */
 #define MXT_COMMAND_RESET	0
 #define MXT_COMMAND_BACKUPNV	1
 #define MXT_COMMAND_CALIBRATE	2
 #define MXT_COMMAND_REPORTALL	3
 #define MXT_COMMAND_DIAGNOSTIC	5
 
-/* MXT_GEN_POWER field */
+/* MXT_GEN_POWER_T7 field */
 #define MXT_POWER_IDLEACQINT	0
 #define MXT_POWER_ACTVACQINT	1
 #define MXT_POWER_ACTV2IDLETO	2
 
-/* MXT_GEN_ACQUIRE field */
+/* MXT_GEN_ACQUIRE_T8 field */
 #define MXT_ACQUIRE_CHRGTIME	0
 #define MXT_ACQUIRE_TCHDRIFT	2
 #define MXT_ACQUIRE_DRIFTST	3
@@ -91,7 +97,7 @@
 #define MXT_ACQUIRE_ATCHCALST	6
 #define MXT_ACQUIRE_ATCHCALSTHR	7
 
-/* MXT_TOUCH_MULTI field */
+/* MXT_TOUCH_MULTI_T9 field */
 #define MXT_TOUCH_CTRL		0
 #define MXT_TOUCH_XORIGIN	1
 #define MXT_TOUCH_YORIGIN	2
@@ -121,7 +127,7 @@
 #define MXT_TOUCH_YEDGEDIST	29
 #define MXT_TOUCH_JUMPLIMIT	30
 
-/* MXT_PROCI_GRIPFACE field */
+/* MXT_PROCI_GRIPFACE_T20 field */
 #define MXT_GRIPFACE_CTRL	0
 #define MXT_GRIPFACE_XLOGRIP	1
 #define MXT_GRIPFACE_XHIGRIP	2
@@ -151,11 +157,11 @@
 #define MXT_NOISE_FREQ4		15
 #define MXT_NOISE_IDLEGCAFVALID	16
 
-/* MXT_SPT_COMMSCONFIG */
+/* MXT_SPT_COMMSCONFIG_T18 */
 #define MXT_COMMS_CTRL		0
 #define MXT_COMMS_CMD		1
 
-/* MXT_SPT_CTECONFIG field */
+/* MXT_SPT_CTECONFIG_T28 field */
 #define MXT_CTE_CTRL		0
 #define MXT_CTE_CMD		1
 #define MXT_CTE_MODE		2
@@ -166,7 +172,7 @@
 #define MXT_VOLTAGE_DEFAULT	2700000
 #define MXT_VOLTAGE_STEP	10000
 
-/* Define for MXT_GEN_COMMAND */
+/* Define for MXT_GEN_COMMAND_T6 */
 #define MXT_BOOT_VALUE		0xa5
 #define MXT_BACKUP_VALUE	0x55
 #define MXT_BACKUP_TIME		25	/* msec */
@@ -256,24 +262,31 @@
 static bool mxt_object_readable(unsigned int type)
 {
 	switch (type) {
-	case MXT_GEN_MESSAGE:
-	case MXT_GEN_COMMAND:
-	case MXT_GEN_POWER:
-	case MXT_GEN_ACQUIRE:
-	case MXT_TOUCH_MULTI:
-	case MXT_TOUCH_KEYARRAY:
-	case MXT_TOUCH_PROXIMITY:
-	case MXT_PROCI_GRIPFACE:
-	case MXT_PROCG_NOISE:
-	case MXT_PROCI_ONETOUCH:
-	case MXT_PROCI_TWOTOUCH:
-	case MXT_PROCI_GRIP:
-	case MXT_PROCI_PALM:
-	case MXT_SPT_COMMSCONFIG:
-	case MXT_SPT_GPIOPWM:
-	case MXT_SPT_SELFTEST:
-	case MXT_SPT_CTECONFIG:
-	case MXT_SPT_USERDATA:
+	case MXT_GEN_MESSAGE_T5:
+	case MXT_GEN_COMMAND_T6:
+	case MXT_GEN_POWER_T7:
+	case MXT_GEN_ACQUIRE_T8:
+	case MXT_GEN_DATASOURCE_T53:
+	case MXT_TOUCH_MULTI_T9:
+	case MXT_TOUCH_KEYARRAY_T15:
+	case MXT_TOUCH_PROXIMITY_T23:
+	case MXT_TOUCH_PROXKEY_T52:
+	case MXT_PROCI_GRIPFACE_T20:
+	case MXT_PROCG_NOISE_T22:
+	case MXT_PROCI_ONETOUCH_T24:
+	case MXT_PROCI_TWOTOUCH_T27:
+	case MXT_PROCI_GRIP_T40:
+	case MXT_PROCI_PALM_T41:
+	case MXT_PROCI_TOUCHSUPPRESSION_T42:
+	case MXT_PROCI_STYLUS_T47:
+	case MXT_PROCG_NOISESUPPRESSION_T48:
+	case MXT_SPT_COMMSCONFIG_T18:
+	case MXT_SPT_GPIOPWM_T19:
+	case MXT_SPT_SELFTEST_T25:
+	case MXT_SPT_CTECONFIG_T28:
+	case MXT_SPT_USERDATA_T38:
+	case MXT_SPT_DIGITIZER_T43:
+	case MXT_SPT_CTECONFIG_T46:
 		return true;
 	default:
 		return false;
@@ -283,21 +296,28 @@
 static bool mxt_object_writable(unsigned int type)
 {
 	switch (type) {
-	case MXT_GEN_COMMAND:
-	case MXT_GEN_POWER:
-	case MXT_GEN_ACQUIRE:
-	case MXT_TOUCH_MULTI:
-	case MXT_TOUCH_KEYARRAY:
-	case MXT_TOUCH_PROXIMITY:
-	case MXT_PROCI_GRIPFACE:
-	case MXT_PROCG_NOISE:
-	case MXT_PROCI_ONETOUCH:
-	case MXT_PROCI_TWOTOUCH:
-	case MXT_PROCI_GRIP:
-	case MXT_PROCI_PALM:
-	case MXT_SPT_GPIOPWM:
-	case MXT_SPT_SELFTEST:
-	case MXT_SPT_CTECONFIG:
+	case MXT_GEN_COMMAND_T6:
+	case MXT_GEN_POWER_T7:
+	case MXT_GEN_ACQUIRE_T8:
+	case MXT_TOUCH_MULTI_T9:
+	case MXT_TOUCH_KEYARRAY_T15:
+	case MXT_TOUCH_PROXIMITY_T23:
+	case MXT_TOUCH_PROXKEY_T52:
+	case MXT_PROCI_GRIPFACE_T20:
+	case MXT_PROCG_NOISE_T22:
+	case MXT_PROCI_ONETOUCH_T24:
+	case MXT_PROCI_TWOTOUCH_T27:
+	case MXT_PROCI_GRIP_T40:
+	case MXT_PROCI_PALM_T41:
+	case MXT_PROCI_TOUCHSUPPRESSION_T42:
+	case MXT_PROCI_STYLUS_T47:
+	case MXT_PROCG_NOISESUPPRESSION_T48:
+	case MXT_SPT_COMMSCONFIG_T18:
+	case MXT_SPT_GPIOPWM_T19:
+	case MXT_SPT_SELFTEST_T25:
+	case MXT_SPT_CTECONFIG_T28:
+	case MXT_SPT_DIGITIZER_T43:
+	case MXT_SPT_CTECONFIG_T46:
 		return true;
 	default:
 		return false;
@@ -455,7 +475,7 @@
 	struct mxt_object *object;
 	u16 reg;
 
-	object = mxt_get_object(data, MXT_GEN_MESSAGE);
+	object = mxt_get_object(data, MXT_GEN_MESSAGE_T5);
 	if (!object)
 		return -EINVAL;
 
@@ -597,8 +617,8 @@
 
 		reportid = message.reportid;
 
-		/* whether reportid is thing of MXT_TOUCH_MULTI */
-		object = mxt_get_object(data, MXT_TOUCH_MULTI);
+		/* whether reportid is thing of MXT_TOUCH_MULTI_T9 */
+		object = mxt_get_object(data, MXT_TOUCH_MULTI_T9);
 		if (!object)
 			goto end;
 
@@ -635,7 +655,9 @@
 		if (!mxt_object_writable(object->type))
 			continue;
 
-		for (j = 0; j < object->size + 1; j++) {
+		for (j = 0;
+		     j < (object->size + 1) * (object->instances + 1);
+		     j++) {
 			config_offset = index + j;
 			if (config_offset > pdata->config_length) {
 				dev_err(dev, "Not enough config data!\n");
@@ -644,7 +666,7 @@
 			mxt_write_object(data, object->type, j,
 					 pdata->config[config_offset]);
 		}
-		index += object->size + 1;
+		index += (object->size + 1) * (object->instances + 1);
 	}
 
 	return 0;
@@ -678,31 +700,31 @@
 	u8 voltage;
 
 	/* Set touchscreen lines */
-	mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_XSIZE,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE,
 			pdata->x_line);
-	mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_YSIZE,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE,
 			pdata->y_line);
 
 	/* Set touchscreen orient */
-	mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_ORIENT,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT,
 			pdata->orient);
 
 	/* Set touchscreen burst length */
-	mxt_write_object(data, MXT_TOUCH_MULTI,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
 			MXT_TOUCH_BLEN, pdata->blen);
 
 	/* Set touchscreen threshold */
-	mxt_write_object(data, MXT_TOUCH_MULTI,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
 			MXT_TOUCH_TCHTHR, pdata->threshold);
 
 	/* Set touchscreen resolution */
-	mxt_write_object(data, MXT_TOUCH_MULTI,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
 			MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
 			MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
-	mxt_write_object(data, MXT_TOUCH_MULTI,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
 			MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI,
+	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
 			MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
 
 	/* Set touchscreen voltage */
@@ -715,7 +737,7 @@
 			voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) /
 				MXT_VOLTAGE_STEP;
 
-		mxt_write_object(data, MXT_SPT_CTECONFIG,
+		mxt_write_object(data, MXT_SPT_CTECONFIG_T28,
 				MXT_CTE_VOLTAGE, voltage);
 	}
 }
@@ -819,13 +841,13 @@
 	mxt_handle_pdata(data);
 
 	/* Backup to memory */
-	mxt_write_object(data, MXT_GEN_COMMAND,
+	mxt_write_object(data, MXT_GEN_COMMAND_T6,
 			MXT_COMMAND_BACKUPNV,
 			MXT_BACKUP_VALUE);
 	msleep(MXT_BACKUP_TIME);
 
 	/* Soft reset */
-	mxt_write_object(data, MXT_GEN_COMMAND,
+	mxt_write_object(data, MXT_GEN_COMMAND_T6,
 			MXT_COMMAND_RESET, 1);
 	msleep(MXT_RESET_TIME);
 
@@ -921,7 +943,7 @@
 	}
 
 	/* Change to the bootloader mode */
-	mxt_write_object(data, MXT_GEN_COMMAND,
+	mxt_write_object(data, MXT_GEN_COMMAND_T6,
 			MXT_COMMAND_RESET, MXT_BOOT_VALUE);
 	msleep(MXT_RESET_TIME);
 
@@ -1027,14 +1049,14 @@
 {
 	/* Touch enable */
 	mxt_write_object(data,
-			MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0x83);
+			MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0x83);
 }
 
 static void mxt_stop(struct mxt_data *data)
 {
 	/* Touch disable */
 	mxt_write_object(data,
-			MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0);
+			MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0);
 }
 
 static int mxt_input_open(struct input_dev *dev)
@@ -1182,7 +1204,7 @@
 	struct input_dev *input_dev = data->input_dev;
 
 	/* Soft reset */
-	mxt_write_object(data, MXT_GEN_COMMAND,
+	mxt_write_object(data, MXT_GEN_COMMAND_T6,
 			MXT_COMMAND_RESET, 1);
 
 	msleep(MXT_RESET_TIME);
diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
index a93c5c2..d8815c5 100644
--- a/drivers/input/touchscreen/cy8ctmg110_ts.c
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
@@ -84,9 +84,9 @@
 	memcpy(i2c_data + 1, value, len);
 
 	ret = i2c_master_send(client, i2c_data, len + 1);
-	if (ret != 1) {
+	if (ret != len + 1) {
 		dev_err(&client->dev, "i2c write data cmd failed\n");
-		return ret ? ret : -EIO;
+		return ret < 0 ? ret : -EIO;
 	}
 
 	return 0;
@@ -193,6 +193,8 @@
 
 	ts->client = client;
 	ts->input = input_dev;
+	ts->reset_pin = pdata->reset_pin;
+	ts->irq_pin = pdata->irq_pin;
 
 	snprintf(ts->phys, sizeof(ts->phys),
 		 "%s/input0", dev_name(&client->dev));
@@ -328,7 +330,7 @@
 	return 0;
 }
 
-static struct i2c_device_id cy8ctmg110_idtable[] = {
+static const struct i2c_device_id cy8ctmg110_idtable[] = {
 	{ CY8CTMG110_DRIVER_NAME, 1 },
 	{ }
 };
diff --git a/drivers/input/touchscreen/intel-mid-touch.c b/drivers/input/touchscreen/intel-mid-touch.c
index 66c96bf..3276952 100644
--- a/drivers/input/touchscreen/intel-mid-touch.c
+++ b/drivers/input/touchscreen/intel-mid-touch.c
@@ -448,15 +448,11 @@
  */
 static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev)
 {
-	int err, i, found;
+	int found = 0;
+	int err, i;
 	u8 r8;
 
-	found = -1;
-
 	for (i = 0; i < MRSTOUCH_MAX_CHANNELS; i++) {
-		if (found >= 0)
-			break;
-
 		err = intel_scu_ipc_ioread8(PMICADDR0 + i, &r8);
 		if (err)
 			return err;
@@ -466,16 +462,15 @@
 			break;
 		}
 	}
-	if (found < 0)
-		return 0;
 
 	if (tsdev->vendor == PMIC_VENDOR_FS) {
-		if (found && found > (MRSTOUCH_MAX_CHANNELS - 18))
+		if (found > MRSTOUCH_MAX_CHANNELS - 18)
 			return -ENOSPC;
 	} else {
-		if (found && found > (MRSTOUCH_MAX_CHANNELS - 4))
+		if (found > MRSTOUCH_MAX_CHANNELS - 4)
 			return -ENOSPC;
 	}
+
 	return found;
 }
 
diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index 3242e70..e966c29 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -157,9 +157,9 @@
 			x, y, p);
 
 		/* are samples valid */
-		if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
-		    (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
-		    (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+		if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+		    (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+		    (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
 			goto up;
 
 		/* coordinate is good */
diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c
index 22a3411..089b0a0 100644
--- a/drivers/input/touchscreen/tnetv107x-ts.c
+++ b/drivers/input/touchscreen/tnetv107x-ts.c
@@ -393,5 +393,5 @@
 
 MODULE_AUTHOR("Cyril Chemparathy");
 MODULE_DESCRIPTION("TNETV107X Touchscreen Driver");
-MODULE_ALIAS("platform: tnetv107x-ts");
+MODULE_ALIAS("platform:tnetv107x-ts");
 MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/wm9705.c b/drivers/input/touchscreen/wm9705.c
index 98e6117..adc13a5 100644
--- a/drivers/input/touchscreen/wm9705.c
+++ b/drivers/input/touchscreen/wm9705.c
@@ -215,8 +215,9 @@
 static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 {
 	int timeout = 5 * delay;
+	bool wants_pen = adcsel & WM97XX_PEN_DOWN;
 
-	if (!wm->pen_probably_down) {
+	if (wants_pen && !wm->pen_probably_down) {
 		u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
 		if (!(data & WM97XX_PEN_DOWN))
 			return RC_PENUP;
@@ -224,13 +225,10 @@
 	}
 
 	/* set up digitiser */
-	if (adcsel & 0x8000)
-		adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
 	if (wm->mach_ops && wm->mach_ops->pre_sample)
 		wm->mach_ops->pre_sample(adcsel);
-	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
-			 adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+				| WM97XX_POLL | WM97XX_DELAY(delay));
 
 	/* wait 3 AC97 time slots + delay for conversion */
 	poll_delay(delay);
@@ -256,13 +254,14 @@
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
-		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-		*sample & WM97XX_ADCSEL_MASK);
+	if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+		dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+			adcsel & WM97XX_ADCSEL_MASK,
+			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
 
-	if (!(*sample & WM97XX_PEN_DOWN)) {
+	if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
 		wm->pen_probably_down = 0;
 		return RC_PENUP;
 	}
@@ -277,14 +276,14 @@
 {
 	int rc;
 
-	rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
+	rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x);
 	if (rc != RC_VALID)
 		return rc;
-	rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
+	rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y);
 	if (rc != RC_VALID)
 		return rc;
 	if (pil) {
-		rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES, &data->p);
+		rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p);
 		if (rc != RC_VALID)
 			return rc;
 	} else
diff --git a/drivers/input/touchscreen/wm9712.c b/drivers/input/touchscreen/wm9712.c
index 2bc2fb8..6e743e3 100644
--- a/drivers/input/touchscreen/wm9712.c
+++ b/drivers/input/touchscreen/wm9712.c
@@ -255,8 +255,9 @@
 static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 {
 	int timeout = 5 * delay;
+	bool wants_pen = adcsel & WM97XX_PEN_DOWN;
 
-	if (!wm->pen_probably_down) {
+	if (wants_pen && !wm->pen_probably_down) {
 		u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
 		if (!(data & WM97XX_PEN_DOWN))
 			return RC_PENUP;
@@ -264,13 +265,10 @@
 	}
 
 	/* set up digitiser */
-	if (adcsel & 0x8000)
-		adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
 	if (wm->mach_ops && wm->mach_ops->pre_sample)
 		wm->mach_ops->pre_sample(adcsel);
-	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
-			 adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+				| WM97XX_POLL | WM97XX_DELAY(delay));
 
 	/* wait 3 AC97 time slots + delay for conversion */
 	poll_delay(delay);
@@ -296,13 +294,14 @@
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
-		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-		*sample & WM97XX_ADCSEL_MASK);
+	if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+		dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+			adcsel & WM97XX_ADCSEL_MASK,
+			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
 
-	if (!(*sample & WM97XX_PEN_DOWN)) {
+	if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
 		wm->pen_probably_down = 0;
 		return RC_PENUP;
 	}
@@ -387,16 +386,18 @@
 		if (rc != RC_VALID)
 			return rc;
 	} else {
-		rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
+		rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN,
+					&data->x);
 		if (rc != RC_VALID)
 			return rc;
 
-		rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
+		rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN,
+					&data->y);
 		if (rc != RC_VALID)
 			return rc;
 
 		if (pil && !five_wire) {
-			rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES,
+			rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN,
 						&data->p);
 			if (rc != RC_VALID)
 				return rc;
diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c
index 73ec995..7405353 100644
--- a/drivers/input/touchscreen/wm9713.c
+++ b/drivers/input/touchscreen/wm9713.c
@@ -261,8 +261,9 @@
 {
 	u16 dig1;
 	int timeout = 5 * delay;
+	bool wants_pen = adcsel & WM97XX_PEN_DOWN;
 
-	if (!wm->pen_probably_down) {
+	if (wants_pen && !wm->pen_probably_down) {
 		u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
 		if (!(data & WM97XX_PEN_DOWN))
 			return RC_PENUP;
@@ -270,15 +271,14 @@
 	}
 
 	/* set up digitiser */
-	if (adcsel & 0x8000)
-		adcsel = 1 << ((adcsel & 0x7fff) + 3);
-
 	dig1 = wm97xx_reg_read(wm, AC97_WM9713_DIG1);
 	dig1 &= ~WM9713_ADCSEL_MASK;
+	/* WM97XX_ADCSEL_* channels need to be converted to WM9713 format */
+	dig1 |= 1 << ((adcsel & WM97XX_ADCSEL_MASK) >> 12);
 
 	if (wm->mach_ops && wm->mach_ops->pre_sample)
 		wm->mach_ops->pre_sample(adcsel);
-	wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | adcsel | WM9713_POLL);
+	wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | WM9713_POLL);
 
 	/* wait 3 AC97 time slots + delay for conversion */
 	poll_delay(delay);
@@ -304,13 +304,14 @@
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSRC_MASK) != ffs(adcsel >> 1) << 12) {
-		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-			*sample & WM97XX_ADCSRC_MASK);
+	if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+		dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+			adcsel & WM97XX_ADCSEL_MASK,
+			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
 
-	if (!(*sample & WM97XX_PEN_DOWN)) {
+	if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
 		wm->pen_probably_down = 0;
 		return RC_PENUP;
 	}
@@ -400,14 +401,14 @@
 		if (rc != RC_VALID)
 			return rc;
 	} else {
-		rc = wm9713_poll_sample(wm, WM9713_ADCSEL_X, &data->x);
+		rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x);
 		if (rc != RC_VALID)
 			return rc;
-		rc = wm9713_poll_sample(wm, WM9713_ADCSEL_Y, &data->y);
+		rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y);
 		if (rc != RC_VALID)
 			return rc;
 		if (pil) {
-			rc = wm9713_poll_sample(wm, WM9713_ADCSEL_PRES,
+			rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN,
 						&data->p);
 			if (rc != RC_VALID)
 				return rc;
diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c
index 5b0f15e..f6328c0 100644
--- a/drivers/input/touchscreen/zylonite-wm97xx.c
+++ b/drivers/input/touchscreen/zylonite-wm97xx.c
@@ -122,9 +122,9 @@
 			x, y, p);
 
 		/* are samples valid */
-		if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
-		    (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
-		    (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+		if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+		    (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+		    (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
 			goto up;
 
 		/* coordinate is good */
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 48e9cc0..1f73d7f 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -2532,6 +2532,9 @@
 
 	/* Setup the generic properties */
 	dev->flags = IFF_NOARP|IFF_POINTOPOINT;
+
+	/* isdn prepends a header in the tx path, can't share skbs */
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->header_ops = NULL;
 	dev->netdev_ops = &isdn_netdev_ops;
 
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 574b09a..0dc6546 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -29,7 +29,6 @@
 #include "md.h"
 #include "bitmap.h"
 
-#include <linux/dm-dirty-log.h>
 /* debug macros */
 
 #define DEBUG 0
@@ -775,10 +774,8 @@
  * 0 or page 1
  */
 static inline struct page *filemap_get_page(struct bitmap *bitmap,
-					unsigned long chunk)
+					    unsigned long chunk)
 {
-	if (bitmap->filemap == NULL)
-		return NULL;
 	if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
 		return NULL;
 	return bitmap->filemap[file_page_index(bitmap, chunk)
@@ -878,28 +875,19 @@
 static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
 				enum bitmap_page_attr attr)
 {
-	if (page)
-		__set_bit((page->index<<2) + attr, bitmap->filemap_attr);
-	else
-		__set_bit(attr, &bitmap->logattrs);
+	__set_bit((page->index<<2) + attr, bitmap->filemap_attr);
 }
 
 static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
 				enum bitmap_page_attr attr)
 {
-	if (page)
-		__clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
-	else
-		__clear_bit(attr, &bitmap->logattrs);
+	__clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
 }
 
 static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
 					   enum bitmap_page_attr attr)
 {
-	if (page)
-		return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
-	else
-		return test_bit(attr, &bitmap->logattrs);
+	return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
 }
 
 /*
@@ -912,30 +900,26 @@
 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 {
 	unsigned long bit;
-	struct page *page = NULL;
+	struct page *page;
 	void *kaddr;
 	unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
 
-	if (!bitmap->filemap) {
-		struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
-		if (log)
-			log->type->mark_region(log, chunk);
-	} else {
+	if (!bitmap->filemap)
+		return;
 
-		page = filemap_get_page(bitmap, chunk);
-		if (!page)
-			return;
-		bit = file_page_offset(bitmap, chunk);
+	page = filemap_get_page(bitmap, chunk);
+	if (!page)
+		return;
+	bit = file_page_offset(bitmap, chunk);
 
-		/* set the bit */
-		kaddr = kmap_atomic(page, KM_USER0);
-		if (bitmap->flags & BITMAP_HOSTENDIAN)
-			set_bit(bit, kaddr);
-		else
-			__test_and_set_bit_le(bit, kaddr);
-		kunmap_atomic(kaddr, KM_USER0);
-		PRINTK("set file bit %lu page %lu\n", bit, page->index);
-	}
+	/* set the bit */
+	kaddr = kmap_atomic(page, KM_USER0);
+	if (bitmap->flags & BITMAP_HOSTENDIAN)
+		set_bit(bit, kaddr);
+	else
+		__set_bit_le(bit, kaddr);
+	kunmap_atomic(kaddr, KM_USER0);
+	PRINTK("set file bit %lu page %lu\n", bit, page->index);
 	/* record page number so it gets flushed to disk when unplug occurs */
 	set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
 }
@@ -952,16 +936,6 @@
 
 	if (!bitmap)
 		return;
-	if (!bitmap->filemap) {
-		/* Must be using a dirty_log */
-		struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
-		dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs);
-		need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs);
-		if (dirty || need_write)
-			if (log->type->flush(log))
-				bitmap->flags |= BITMAP_WRITE_ERROR;
-		goto out;
-	}
 
 	/* look at each page to see if there are any set bits that need to be
 	 * flushed out to disk */
@@ -990,7 +964,6 @@
 		else
 			md_super_wait(bitmap->mddev);
 	}
-out:
 	if (bitmap->flags & BITMAP_WRITE_ERROR)
 		bitmap_file_kick(bitmap);
 }
@@ -1199,7 +1172,6 @@
 	struct page *page = NULL, *lastpage = NULL;
 	sector_t blocks;
 	void *paddr;
-	struct dm_dirty_log *log = mddev->bitmap_info.log;
 
 	/* Use a mutex to guard daemon_work against
 	 * bitmap_destroy.
@@ -1224,12 +1196,11 @@
 	spin_lock_irqsave(&bitmap->lock, flags);
 	for (j = 0; j < bitmap->chunks; j++) {
 		bitmap_counter_t *bmc;
-		if (!bitmap->filemap) {
-			if (!log)
-				/* error or shutdown */
-				break;
-		} else
-			page = filemap_get_page(bitmap, j);
+		if (!bitmap->filemap)
+			/* error or shutdown */
+			break;
+
+		page = filemap_get_page(bitmap, j);
 
 		if (page != lastpage) {
 			/* skip this page unless it's marked as needing cleaning */
@@ -1298,17 +1269,16 @@
 						  -1);
 
 				/* clear the bit */
-				if (page) {
-					paddr = kmap_atomic(page, KM_USER0);
-					if (bitmap->flags & BITMAP_HOSTENDIAN)
-						clear_bit(file_page_offset(bitmap, j),
-							  paddr);
-					else
-						__test_and_clear_bit_le(file_page_offset(bitmap, j),
-							       paddr);
-					kunmap_atomic(paddr, KM_USER0);
-				} else
-					log->type->clear_region(log, j);
+				paddr = kmap_atomic(page, KM_USER0);
+				if (bitmap->flags & BITMAP_HOSTENDIAN)
+					clear_bit(file_page_offset(bitmap, j),
+						  paddr);
+				else
+					__clear_bit_le(
+							file_page_offset(bitmap,
+									 j),
+							paddr);
+				kunmap_atomic(paddr, KM_USER0);
 			}
 		} else
 			j |= PAGE_COUNTER_MASK;
@@ -1316,16 +1286,12 @@
 	spin_unlock_irqrestore(&bitmap->lock, flags);
 
 	/* now sync the final page */
-	if (lastpage != NULL || log != NULL) {
+	if (lastpage != NULL) {
 		spin_lock_irqsave(&bitmap->lock, flags);
 		if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
 			clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
 			spin_unlock_irqrestore(&bitmap->lock, flags);
-			if (lastpage)
-				write_page(bitmap, lastpage, 0);
-			else
-				if (log->type->flush(log))
-					bitmap->flags |= BITMAP_WRITE_ERROR;
+			write_page(bitmap, lastpage, 0);
 		} else {
 			set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
 			spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1767,12 +1733,10 @@
 	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
 
 	if (!file
-	    && !mddev->bitmap_info.offset
-	    && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */
+	    && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
 		return 0;
 
 	BUG_ON(file && mddev->bitmap_info.offset);
-	BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log);
 
 	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
 	if (!bitmap)
@@ -1863,6 +1827,7 @@
 int bitmap_load(mddev_t *mddev)
 {
 	int err = 0;
+	sector_t start = 0;
 	sector_t sector = 0;
 	struct bitmap *bitmap = mddev->bitmap;
 
@@ -1881,24 +1846,14 @@
 	}
 	bitmap_close_sync(bitmap);
 
-	if (mddev->bitmap_info.log) {
-		unsigned long i;
-		struct dm_dirty_log *log = mddev->bitmap_info.log;
-		for (i = 0; i < bitmap->chunks; i++)
-			if (!log->type->in_sync(log, i, 1))
-				bitmap_set_memory_bits(bitmap,
-						       (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
-						       1);
-	} else {
-		sector_t start = 0;
-		if (mddev->degraded == 0
-		    || bitmap->events_cleared == mddev->events)
-			/* no need to keep dirty bits to optimise a
-			 * re-add of a missing device */
-			start = mddev->recovery_cp;
+	if (mddev->degraded == 0
+	    || bitmap->events_cleared == mddev->events)
+		/* no need to keep dirty bits to optimise a
+		 * re-add of a missing device */
+		start = mddev->recovery_cp;
 
-		err = bitmap_init_from_disk(bitmap, start);
-	}
+	err = bitmap_init_from_disk(bitmap, start);
+
 	if (err)
 		goto out;
 
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index b2a127e..a28f2e5 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -212,10 +212,6 @@
 	unsigned long file_pages; /* number of pages in the file */
 	int last_page_size; /* bytes in the last page */
 
-	unsigned long logattrs; /* used when filemap_attr doesn't exist
-				 * because we are working with a dirty_log
-				 */
-
 	unsigned long flags;
 
 	int allclean;
@@ -237,7 +233,6 @@
 	wait_queue_head_t behind_wait;
 
 	struct sysfs_dirent *sysfs_can_clear;
-
 };
 
 /* the bitmap API */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dfc9425..8e221a2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -215,6 +215,55 @@
 }
 EXPORT_SYMBOL_GPL(bio_clone_mddev);
 
+void md_trim_bio(struct bio *bio, int offset, int size)
+{
+	/* 'bio' is a cloned bio which we need to trim to match
+	 * the given offset and size.
+	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
+	 */
+	int i;
+	struct bio_vec *bvec;
+	int sofar = 0;
+
+	size <<= 9;
+	if (offset == 0 && size == bio->bi_size)
+		return;
+
+	bio->bi_sector += offset;
+	bio->bi_size = size;
+	offset <<= 9;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+	while (bio->bi_idx < bio->bi_vcnt &&
+	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+		/* remove this whole bio_vec */
+		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+		bio->bi_idx++;
+	}
+	if (bio->bi_idx < bio->bi_vcnt) {
+		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+	}
+	/* avoid any complications with bi_idx being non-zero*/
+	if (bio->bi_idx) {
+		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+		bio->bi_vcnt -= bio->bi_idx;
+		bio->bi_idx = 0;
+	}
+	/* Make sure vcnt and last bv are not too big */
+	bio_for_each_segment(bvec, bio, i) {
+		if (sofar + bvec->bv_len > size)
+			bvec->bv_len = size - sofar;
+		if (bvec->bv_len == 0) {
+			bio->bi_vcnt = i;
+			break;
+		}
+		sofar += bvec->bv_len;
+	}
+}
+EXPORT_SYMBOL_GPL(md_trim_bio);
+
 /*
  * We have a system wide 'event count' that is incremented
  * on any 'interesting' event, and readers of /proc/mdstat
@@ -757,6 +806,10 @@
 		rdev->sb_start = 0;
 		rdev->sectors = 0;
 	}
+	if (rdev->bb_page) {
+		put_page(rdev->bb_page);
+		rdev->bb_page = NULL;
+	}
 }
 
 
@@ -1025,7 +1078,7 @@
 	ret = -EINVAL;
 
 	bdevname(rdev->bdev, b);
-	sb = (mdp_super_t*)page_address(rdev->sb_page);
+	sb = page_address(rdev->sb_page);
 
 	if (sb->md_magic != MD_SB_MAGIC) {
 		printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
@@ -1054,6 +1107,7 @@
 	rdev->preferred_minor = sb->md_minor;
 	rdev->data_offset = 0;
 	rdev->sb_size = MD_SB_BYTES;
+	rdev->badblocks.shift = -1;
 
 	if (sb->level == LEVEL_MULTIPATH)
 		rdev->desc_nr = -1;
@@ -1064,7 +1118,7 @@
 		ret = 1;
 	} else {
 		__u64 ev1, ev2;
-		mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
+		mdp_super_t *refsb = page_address(refdev->sb_page);
 		if (!uuid_equal(refsb, sb)) {
 			printk(KERN_WARNING "md: %s has different UUID to %s\n",
 				b, bdevname(refdev->bdev,b2));
@@ -1099,7 +1153,7 @@
 static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
 	mdp_disk_t *desc;
-	mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+	mdp_super_t *sb = page_address(rdev->sb_page);
 	__u64 ev1 = md_event(sb);
 
 	rdev->raid_disk = -1;
@@ -1230,7 +1284,7 @@
 
 	rdev->sb_size = MD_SB_BYTES;
 
-	sb = (mdp_super_t*)page_address(rdev->sb_page);
+	sb = page_address(rdev->sb_page);
 
 	memset(sb, 0, sizeof(*sb));
 
@@ -1395,6 +1449,8 @@
 	return cpu_to_le32(csum);
 }
 
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+			    int acknowledged);
 static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 {
 	struct mdp_superblock_1 *sb;
@@ -1435,7 +1491,7 @@
 	if (ret) return ret;
 
 
-	sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+	sb = page_address(rdev->sb_page);
 
 	if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
 	    sb->major_version != cpu_to_le32(1) ||
@@ -1473,12 +1529,52 @@
 	else
 		rdev->desc_nr = le32_to_cpu(sb->dev_number);
 
+	if (!rdev->bb_page) {
+		rdev->bb_page = alloc_page(GFP_KERNEL);
+		if (!rdev->bb_page)
+			return -ENOMEM;
+	}
+	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
+	    rdev->badblocks.count == 0) {
+		/* need to load the bad block list.
+		 * Currently we limit it to one page.
+		 */
+		s32 offset;
+		sector_t bb_sector;
+		u64 *bbp;
+		int i;
+		int sectors = le16_to_cpu(sb->bblog_size);
+		if (sectors > (PAGE_SIZE / 512))
+			return -EINVAL;
+		offset = le32_to_cpu(sb->bblog_offset);
+		if (offset == 0)
+			return -EINVAL;
+		bb_sector = (long long)offset;
+		if (!sync_page_io(rdev, bb_sector, sectors << 9,
+				  rdev->bb_page, READ, true))
+			return -EIO;
+		bbp = (u64 *)page_address(rdev->bb_page);
+		rdev->badblocks.shift = sb->bblog_shift;
+		for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
+			u64 bb = le64_to_cpu(*bbp);
+			int count = bb & (0x3ff);
+			u64 sector = bb >> 10;
+			sector <<= sb->bblog_shift;
+			count <<= sb->bblog_shift;
+			if (bb + 1 == 0)
+				break;
+			if (md_set_badblocks(&rdev->badblocks,
+					     sector, count, 1) == 0)
+				return -EINVAL;
+		}
+	} else if (sb->bblog_offset == 0)
+		rdev->badblocks.shift = -1;
+
 	if (!refdev) {
 		ret = 1;
 	} else {
 		__u64 ev1, ev2;
-		struct mdp_superblock_1 *refsb = 
-			(struct mdp_superblock_1*)page_address(refdev->sb_page);
+		struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
 
 		if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
 		    sb->level != refsb->level ||
@@ -1513,7 +1609,7 @@
 
 static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
-	struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+	struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
 	__u64 ev1 = le64_to_cpu(sb->events);
 
 	rdev->raid_disk = -1;
@@ -1619,13 +1715,12 @@
 	int max_dev, i;
 	/* make rdev->sb match mddev and rdev data. */
 
-	sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+	sb = page_address(rdev->sb_page);
 
 	sb->feature_map = 0;
 	sb->pad0 = 0;
 	sb->recovery_offset = cpu_to_le64(0);
 	memset(sb->pad1, 0, sizeof(sb->pad1));
-	memset(sb->pad2, 0, sizeof(sb->pad2));
 	memset(sb->pad3, 0, sizeof(sb->pad3));
 
 	sb->utime = cpu_to_le64((__u64)mddev->utime);
@@ -1665,6 +1760,40 @@
 		sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
 	}
 
+	if (rdev->badblocks.count == 0)
+		/* Nothing to do for bad blocks*/ ;
+	else if (sb->bblog_offset == 0)
+		/* Cannot record bad blocks on this device */
+		md_error(mddev, rdev);
+	else {
+		struct badblocks *bb = &rdev->badblocks;
+		u64 *bbp = (u64 *)page_address(rdev->bb_page);
+		u64 *p = bb->page;
+		sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
+		if (bb->changed) {
+			unsigned seq;
+
+retry:
+			seq = read_seqbegin(&bb->lock);
+
+			memset(bbp, 0xff, PAGE_SIZE);
+
+			for (i = 0 ; i < bb->count ; i++) {
+				u64 internal_bb = *p++;
+				u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
+						| BB_LEN(internal_bb));
+				*bbp++ = cpu_to_le64(store_bb);
+			}
+			if (read_seqretry(&bb->lock, seq))
+				goto retry;
+
+			bb->sector = (rdev->sb_start +
+				      (int)le32_to_cpu(sb->bblog_offset));
+			bb->size = le16_to_cpu(sb->bblog_size);
+			bb->changed = 0;
+		}
+	}
+
 	max_dev = 0;
 	list_for_each_entry(rdev2, &mddev->disks, same_set)
 		if (rdev2->desc_nr+1 > max_dev)
@@ -1724,7 +1853,7 @@
 			num_sectors = max_sectors;
 		rdev->sb_start = sb_start;
 	}
-	sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
+	sb = page_address(rdev->sb_page);
 	sb->data_size = cpu_to_le64(num_sectors);
 	sb->super_offset = rdev->sb_start;
 	sb->sb_csum = calc_sb_1_csum(sb);
@@ -1922,7 +2051,7 @@
 	bd_link_disk_holder(rdev->bdev, mddev->gendisk);
 
 	/* May as well allow recovery to be retried once */
-	mddev->recovery_disabled = 0;
+	mddev->recovery_disabled++;
 
 	return 0;
 
@@ -1953,6 +2082,9 @@
 	sysfs_remove_link(&rdev->kobj, "block");
 	sysfs_put(rdev->sysfs_state);
 	rdev->sysfs_state = NULL;
+	kfree(rdev->badblocks.page);
+	rdev->badblocks.count = 0;
+	rdev->badblocks.page = NULL;
 	/* We need to delay this, otherwise we can deadlock when
 	 * writing to 'remove' to "dev/state".  We also need
 	 * to delay it due to rcu usage.
@@ -2127,10 +2259,10 @@
 		printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
 		switch (major_version) {
 		case 0:
-			print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+			print_sb_90(page_address(rdev->sb_page));
 			break;
 		case 1:
-			print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+			print_sb_1(page_address(rdev->sb_page));
 			break;
 		}
 	} else
@@ -2194,6 +2326,7 @@
 	mdk_rdev_t *rdev;
 	int sync_req;
 	int nospares = 0;
+	int any_badblocks_changed = 0;
 
 repeat:
 	/* First make sure individual recovery_offsets are correct */
@@ -2208,8 +2341,18 @@
 	if (!mddev->persistent) {
 		clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
 		clear_bit(MD_CHANGE_DEVS, &mddev->flags);
-		if (!mddev->external)
+		if (!mddev->external) {
 			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+			list_for_each_entry(rdev, &mddev->disks, same_set) {
+				if (rdev->badblocks.changed) {
+					md_ack_all_badblocks(&rdev->badblocks);
+					md_error(mddev, rdev);
+				}
+				clear_bit(Blocked, &rdev->flags);
+				clear_bit(BlockedBadBlocks, &rdev->flags);
+				wake_up(&rdev->blocked_wait);
+			}
+		}
 		wake_up(&mddev->sb_wait);
 		return;
 	}
@@ -2265,6 +2408,14 @@
 		MD_BUG();
 		mddev->events --;
 	}
+
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		if (rdev->badblocks.changed)
+			any_badblocks_changed++;
+		if (test_bit(Faulty, &rdev->flags))
+			set_bit(FaultRecorded, &rdev->flags);
+	}
+
 	sync_sbs(mddev, nospares);
 	spin_unlock_irq(&mddev->write_lock);
 
@@ -2290,6 +2441,13 @@
 				bdevname(rdev->bdev,b),
 				(unsigned long long)rdev->sb_start);
 			rdev->sb_events = mddev->events;
+			if (rdev->badblocks.size) {
+				md_super_write(mddev, rdev,
+					       rdev->badblocks.sector,
+					       rdev->badblocks.size << 9,
+					       rdev->bb_page);
+				rdev->badblocks.size = 0;
+			}
 
 		} else
 			dprintk(")\n");
@@ -2313,6 +2471,15 @@
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		if (test_and_clear_bit(FaultRecorded, &rdev->flags))
+			clear_bit(Blocked, &rdev->flags);
+
+		if (any_badblocks_changed)
+			md_ack_all_badblocks(&rdev->badblocks);
+		clear_bit(BlockedBadBlocks, &rdev->flags);
+		wake_up(&rdev->blocked_wait);
+	}
 }
 
 /* words written to sysfs files may, or may not, be \n terminated.
@@ -2347,7 +2514,8 @@
 	char *sep = "";
 	size_t len = 0;
 
-	if (test_bit(Faulty, &rdev->flags)) {
+	if (test_bit(Faulty, &rdev->flags) ||
+	    rdev->badblocks.unacked_exist) {
 		len+= sprintf(page+len, "%sfaulty",sep);
 		sep = ",";
 	}
@@ -2359,7 +2527,8 @@
 		len += sprintf(page+len, "%swrite_mostly",sep);
 		sep = ",";
 	}
-	if (test_bit(Blocked, &rdev->flags)) {
+	if (test_bit(Blocked, &rdev->flags) ||
+	    rdev->badblocks.unacked_exist) {
 		len += sprintf(page+len, "%sblocked", sep);
 		sep = ",";
 	}
@@ -2368,6 +2537,10 @@
 		len += sprintf(page+len, "%sspare", sep);
 		sep = ",";
 	}
+	if (test_bit(WriteErrorSeen, &rdev->flags)) {
+		len += sprintf(page+len, "%swrite_error", sep);
+		sep = ",";
+	}
 	return len+sprintf(page+len, "\n");
 }
 
@@ -2375,13 +2548,15 @@
 state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
 	/* can write
-	 *  faulty  - simulates and error
+	 *  faulty  - simulates an error
 	 *  remove  - disconnects the device
 	 *  writemostly - sets write_mostly
 	 *  -writemostly - clears write_mostly
-	 *  blocked - sets the Blocked flag
-	 *  -blocked - clears the Blocked flag
+	 *  blocked - sets the Blocked flags
+	 *  -blocked - clears the Blocked and possibly simulates an error
 	 *  insync - sets Insync providing device isn't active
+	 *  write_error - sets WriteErrorSeen
+	 *  -write_error - clears WriteErrorSeen
 	 */
 	int err = -EINVAL;
 	if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -2408,7 +2583,15 @@
 		set_bit(Blocked, &rdev->flags);
 		err = 0;
 	} else if (cmd_match(buf, "-blocked")) {
+		if (!test_bit(Faulty, &rdev->flags) &&
+		    test_bit(BlockedBadBlocks, &rdev->flags)) {
+			/* metadata handler doesn't understand badblocks,
+			 * so we need to fail the device
+			 */
+			md_error(rdev->mddev, rdev);
+		}
 		clear_bit(Blocked, &rdev->flags);
+		clear_bit(BlockedBadBlocks, &rdev->flags);
 		wake_up(&rdev->blocked_wait);
 		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
 		md_wakeup_thread(rdev->mddev->thread);
@@ -2417,6 +2600,12 @@
 	} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
 		set_bit(In_sync, &rdev->flags);
 		err = 0;
+	} else if (cmd_match(buf, "write_error")) {
+		set_bit(WriteErrorSeen, &rdev->flags);
+		err = 0;
+	} else if (cmd_match(buf, "-write_error")) {
+		clear_bit(WriteErrorSeen, &rdev->flags);
+		err = 0;
 	}
 	if (!err)
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -2459,7 +2648,6 @@
 {
 	char *e;
 	int err;
-	char nm[20];
 	int slot = simple_strtoul(buf, &e, 10);
 	if (strncmp(buf, "none", 4)==0)
 		slot = -1;
@@ -2482,8 +2670,7 @@
 			hot_remove_disk(rdev->mddev, rdev->raid_disk);
 		if (err)
 			return err;
-		sprintf(nm, "rd%d", rdev->raid_disk);
-		sysfs_remove_link(&rdev->mddev->kobj, nm);
+		sysfs_unlink_rdev(rdev->mddev, rdev);
 		rdev->raid_disk = -1;
 		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
 		md_wakeup_thread(rdev->mddev->thread);
@@ -2522,8 +2709,7 @@
 			return err;
 		} else
 			sysfs_notify_dirent_safe(rdev->sysfs_state);
-		sprintf(nm, "rd%d", rdev->raid_disk);
-		if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
+		if (sysfs_link_rdev(rdev->mddev, rdev))
 			/* failure here is OK */;
 		/* don't wakeup anyone, leave that to userspace. */
 	} else {
@@ -2712,6 +2898,39 @@
 static struct rdev_sysfs_entry rdev_recovery_start =
 __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
 
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack);
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
+
+static ssize_t bb_show(mdk_rdev_t *rdev, char *page)
+{
+	return badblocks_show(&rdev->badblocks, page, 0);
+}
+static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+	int rv = badblocks_store(&rdev->badblocks, page, len, 0);
+	/* Maybe that ack was all we needed */
+	if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
+		wake_up(&rdev->blocked_wait);
+	return rv;
+}
+static struct rdev_sysfs_entry rdev_bad_blocks =
+__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
+
+
+static ssize_t ubb_show(mdk_rdev_t *rdev, char *page)
+{
+	return badblocks_show(&rdev->badblocks, page, 1);
+}
+static ssize_t ubb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+	return badblocks_store(&rdev->badblocks, page, len, 1);
+}
+static struct rdev_sysfs_entry rdev_unack_bad_blocks =
+__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
+
 static struct attribute *rdev_default_attrs[] = {
 	&rdev_state.attr,
 	&rdev_errors.attr,
@@ -2719,6 +2938,8 @@
 	&rdev_offset.attr,
 	&rdev_size.attr,
 	&rdev_recovery_start.attr,
+	&rdev_bad_blocks.attr,
+	&rdev_unack_bad_blocks.attr,
 	NULL,
 };
 static ssize_t
@@ -2782,7 +3003,7 @@
 	.default_attrs	= rdev_default_attrs,
 };
 
-void md_rdev_init(mdk_rdev_t *rdev)
+int md_rdev_init(mdk_rdev_t *rdev)
 {
 	rdev->desc_nr = -1;
 	rdev->saved_raid_disk = -1;
@@ -2792,12 +3013,27 @@
 	rdev->sb_events = 0;
 	rdev->last_read_error.tv_sec  = 0;
 	rdev->last_read_error.tv_nsec = 0;
+	rdev->sb_loaded = 0;
+	rdev->bb_page = NULL;
 	atomic_set(&rdev->nr_pending, 0);
 	atomic_set(&rdev->read_errors, 0);
 	atomic_set(&rdev->corrected_errors, 0);
 
 	INIT_LIST_HEAD(&rdev->same_set);
 	init_waitqueue_head(&rdev->blocked_wait);
+
+	/* Add space to store bad block list.
+	 * This reserves the space even on arrays where it cannot
+	 * be used - I wonder if that matters
+	 */
+	rdev->badblocks.count = 0;
+	rdev->badblocks.shift = 0;
+	rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	seqlock_init(&rdev->badblocks.lock);
+	if (rdev->badblocks.page == NULL)
+		return -ENOMEM;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(md_rdev_init);
 /*
@@ -2823,8 +3059,11 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
-	md_rdev_init(rdev);
-	if ((err = alloc_disk_sb(rdev)))
+	err = md_rdev_init(rdev);
+	if (err)
+		goto abort_free;
+	err = alloc_disk_sb(rdev);
+	if (err)
 		goto abort_free;
 
 	err = lock_rdev(rdev, newdev, super_format == -2);
@@ -2860,15 +3099,17 @@
 			goto abort_free;
 		}
 	}
+	if (super_format == -1)
+		/* hot-add for 0.90, or non-persistent: so no badblocks */
+		rdev->badblocks.shift = -1;
 
 	return rdev;
 
 abort_free:
-	if (rdev->sb_page) {
-		if (rdev->bdev)
-			unlock_rdev(rdev);
-		free_disk_sb(rdev);
-	}
+	if (rdev->bdev)
+		unlock_rdev(rdev);
+	free_disk_sb(rdev);
+	kfree(rdev->badblocks.page);
 	kfree(rdev);
 	return ERR_PTR(err);
 }
@@ -3149,15 +3390,13 @@
 	}
 
 	list_for_each_entry(rdev, &mddev->disks, same_set) {
-		char nm[20];
 		if (rdev->raid_disk < 0)
 			continue;
 		if (rdev->new_raid_disk >= mddev->raid_disks)
 			rdev->new_raid_disk = -1;
 		if (rdev->new_raid_disk == rdev->raid_disk)
 			continue;
-		sprintf(nm, "rd%d", rdev->raid_disk);
-		sysfs_remove_link(&mddev->kobj, nm);
+		sysfs_unlink_rdev(mddev, rdev);
 	}
 	list_for_each_entry(rdev, &mddev->disks, same_set) {
 		if (rdev->raid_disk < 0)
@@ -3168,11 +3407,10 @@
 		if (rdev->raid_disk < 0)
 			clear_bit(In_sync, &rdev->flags);
 		else {
-			char nm[20];
-			sprintf(nm, "rd%d", rdev->raid_disk);
-			if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
-				printk("md: cannot register %s for %s after level change\n",
-				       nm, mdname(mddev));
+			if (sysfs_link_rdev(mddev, rdev))
+				printk(KERN_WARNING "md: cannot register rd%d"
+				       " for %s after level change\n",
+				       rdev->raid_disk, mdname(mddev));
 		}
 	}
 
@@ -4504,7 +4742,8 @@
 	}
 
 	if (mddev->bio_set == NULL)
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
+		mddev->bio_set = bioset_create(BIO_POOL_SIZE,
+					       sizeof(mddev_t *));
 
 	spin_lock(&pers_lock);
 	pers = find_pers(mddev->level, mddev->clevel);
@@ -4621,12 +4860,9 @@
 	smp_wmb();
 	mddev->ready = 1;
 	list_for_each_entry(rdev, &mddev->disks, same_set)
-		if (rdev->raid_disk >= 0) {
-			char nm[20];
-			sprintf(nm, "rd%d", rdev->raid_disk);
-			if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
+		if (rdev->raid_disk >= 0)
+			if (sysfs_link_rdev(mddev, rdev))
 				/* failure here is OK */;
-		}
 	
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	
@@ -4854,11 +5090,8 @@
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
 
 		list_for_each_entry(rdev, &mddev->disks, same_set)
-			if (rdev->raid_disk >= 0) {
-				char nm[20];
-				sprintf(nm, "rd%d", rdev->raid_disk);
-				sysfs_remove_link(&mddev->kobj, nm);
-			}
+			if (rdev->raid_disk >= 0)
+				sysfs_unlink_rdev(mddev, rdev);
 
 		set_capacity(disk, 0);
 		mutex_unlock(&mddev->open_mutex);
@@ -6198,18 +6431,7 @@
 	if (!rdev || test_bit(Faulty, &rdev->flags))
 		return;
 
-	if (mddev->external)
-		set_bit(Blocked, &rdev->flags);
-/*
-	dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
-		mdname(mddev),
-		MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
-		__builtin_return_address(0),__builtin_return_address(1),
-		__builtin_return_address(2),__builtin_return_address(3));
-*/
-	if (!mddev->pers)
-		return;
-	if (!mddev->pers->error_handler)
+	if (!mddev->pers || !mddev->pers->error_handler)
 		return;
 	mddev->pers->error_handler(mddev,rdev);
 	if (mddev->degraded)
@@ -6933,11 +7155,14 @@
 			atomic_add(sectors, &mddev->recovery_active);
 		}
 
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+			break;
+
 		j += sectors;
 		if (j>1) mddev->curr_resync = j;
 		mddev->curr_mark_cnt = io_sectors;
 		if (last_check == 0)
-			/* this is the earliers that rebuilt will be
+			/* this is the earliest that rebuild will be
 			 * visible in /proc/mdstat
 			 */
 			md_new_event(mddev);
@@ -6946,10 +7171,6 @@
 			continue;
 
 		last_check = io_sectors;
-
-		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-			break;
-
 	repeat:
 		if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
 			/* step marks */
@@ -7067,29 +7288,23 @@
 		    atomic_read(&rdev->nr_pending)==0) {
 			if (mddev->pers->hot_remove_disk(
 				    mddev, rdev->raid_disk)==0) {
-				char nm[20];
-				sprintf(nm,"rd%d", rdev->raid_disk);
-				sysfs_remove_link(&mddev->kobj, nm);
+				sysfs_unlink_rdev(mddev, rdev);
 				rdev->raid_disk = -1;
 			}
 		}
 
-	if (mddev->degraded && !mddev->recovery_disabled) {
+	if (mddev->degraded) {
 		list_for_each_entry(rdev, &mddev->disks, same_set) {
 			if (rdev->raid_disk >= 0 &&
 			    !test_bit(In_sync, &rdev->flags) &&
-			    !test_bit(Faulty, &rdev->flags) &&
-			    !test_bit(Blocked, &rdev->flags))
+			    !test_bit(Faulty, &rdev->flags))
 				spares++;
 			if (rdev->raid_disk < 0
 			    && !test_bit(Faulty, &rdev->flags)) {
 				rdev->recovery_offset = 0;
 				if (mddev->pers->
 				    hot_add_disk(mddev, rdev) == 0) {
-					char nm[20];
-					sprintf(nm, "rd%d", rdev->raid_disk);
-					if (sysfs_create_link(&mddev->kobj,
-							      &rdev->kobj, nm))
+					if (sysfs_link_rdev(mddev, rdev))
 						/* failure here is OK */;
 					spares++;
 					md_new_event(mddev);
@@ -7138,6 +7353,8 @@
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	sysfs_notify_dirent_safe(mddev->sysfs_action);
 	md_new_event(mddev);
+	if (mddev->event_work.func)
+		queue_work(md_misc_wq, &mddev->event_work);
 }
 
 /*
@@ -7170,9 +7387,6 @@
 	if (mddev->bitmap)
 		bitmap_daemon_work(mddev);
 
-	if (mddev->ro)
-		return;
-
 	if (signal_pending(current)) {
 		if (mddev->pers->sync_request && !mddev->external) {
 			printk(KERN_INFO "md: %s in immediate safe mode\n",
@@ -7209,9 +7423,7 @@
 				    atomic_read(&rdev->nr_pending)==0) {
 					if (mddev->pers->hot_remove_disk(
 						    mddev, rdev->raid_disk)==0) {
-						char nm[20];
-						sprintf(nm,"rd%d", rdev->raid_disk);
-						sysfs_remove_link(&mddev->kobj, nm);
+						sysfs_unlink_rdev(mddev, rdev);
 						rdev->raid_disk = -1;
 					}
 				}
@@ -7331,12 +7543,499 @@
 {
 	sysfs_notify_dirent_safe(rdev->sysfs_state);
 	wait_event_timeout(rdev->blocked_wait,
-			   !test_bit(Blocked, &rdev->flags),
+			   !test_bit(Blocked, &rdev->flags) &&
+			   !test_bit(BlockedBadBlocks, &rdev->flags),
 			   msecs_to_jiffies(5000));
 	rdev_dec_pending(rdev, mddev);
 }
 EXPORT_SYMBOL(md_wait_for_blocked_rdev);
 
+
+/* Bad block management.
+ * We can record which blocks on each device are 'bad' and so just
+ * fail those blocks, or that stripe, rather than the whole device.
+ * Entries in the bad-block table are 64bits wide.  This comprises:
+ * Length of bad-range, in sectors: 0-511 for lengths 1-512
+ * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
+ *  A 'shift' can be set so that larger blocks are tracked and
+ *  consequently larger devices can be covered.
+ * 'Acknowledged' flag - 1 bit. - the most significant bit.
+ *
+ * Locking of the bad-block table uses a seqlock so md_is_badblock
+ * might need to retry if it is very unlucky.
+ * We will sometimes want to check for bad blocks in a bi_end_io function,
+ * so we use the write_seqlock_irq variant.
+ *
+ * When looking for a bad block we specify a range and want to
+ * know if any block in the range is bad.  So we binary-search
+ * to the last range that starts at-or-before the given endpoint,
+ * (or "before the sector after the target range")
+ * then see if it ends after the given start.
+ * We return
+ *  0 if there are no known bad blocks in the range
+ *  1 if there are known bad block which are all acknowledged
+ * -1 if there are bad blocks which have not yet been acknowledged in metadata.
+ * plus the start/length of the first bad section we overlap.
+ */
+int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+		   sector_t *first_bad, int *bad_sectors)
+{
+	int hi;
+	int lo = 0;
+	u64 *p = bb->page;
+	int rv = 0;
+	sector_t target = s + sectors;
+	unsigned seq;
+
+	if (bb->shift > 0) {
+		/* round the start down, and the end up */
+		s >>= bb->shift;
+		target += (1<<bb->shift) - 1;
+		target >>= bb->shift;
+		sectors = target - s;
+	}
+	/* 'target' is now the first block after the bad range */
+
+retry:
+	seq = read_seqbegin(&bb->lock);
+
+	hi = bb->count;
+
+	/* Binary search between lo and hi for 'target'
+	 * i.e. for the last range that starts before 'target'
+	 */
+	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
+	 * are known not to be the last range before target.
+	 * VARIANT: hi-lo is the number of possible
+	 * ranges, and decreases until it reaches 1
+	 */
+	while (hi - lo > 1) {
+		int mid = (lo + hi) / 2;
+		sector_t a = BB_OFFSET(p[mid]);
+		if (a < target)
+			/* This could still be the one, earlier ranges
+			 * could not. */
+			lo = mid;
+		else
+			/* This and later ranges are definitely out. */
+			hi = mid;
+	}
+	/* 'lo' might be the last that started before target, but 'hi' isn't */
+	if (hi > lo) {
+		/* need to check all range that end after 's' to see if
+		 * any are unacknowledged.
+		 */
+		while (lo >= 0 &&
+		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+			if (BB_OFFSET(p[lo]) < target) {
+				/* starts before the end, and finishes after
+				 * the start, so they must overlap
+				 */
+				if (rv != -1 && BB_ACK(p[lo]))
+					rv = 1;
+				else
+					rv = -1;
+				*first_bad = BB_OFFSET(p[lo]);
+				*bad_sectors = BB_LEN(p[lo]);
+			}
+			lo--;
+		}
+	}
+
+	if (read_seqretry(&bb->lock, seq))
+		goto retry;
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(md_is_badblock);
+
+/*
+ * Add a range of bad blocks to the table.
+ * This might extend the table, or might contract it
+ * if two adjacent ranges can be merged.
+ * We binary-search to find the 'insertion' point, then
+ * decide how best to handle it.
+ */
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+			    int acknowledged)
+{
+	u64 *p;
+	int lo, hi;
+	int rv = 1;
+
+	if (bb->shift < 0)
+		/* badblocks are disabled */
+		return 0;
+
+	if (bb->shift) {
+		/* round the start down, and the end up */
+		sector_t next = s + sectors;
+		s >>= bb->shift;
+		next += (1<<bb->shift) - 1;
+		next >>= bb->shift;
+		sectors = next - s;
+	}
+
+	write_seqlock_irq(&bb->lock);
+
+	p = bb->page;
+	lo = 0;
+	hi = bb->count;
+	/* Find the last range that starts at-or-before 's' */
+	while (hi - lo > 1) {
+		int mid = (lo + hi) / 2;
+		sector_t a = BB_OFFSET(p[mid]);
+		if (a <= s)
+			lo = mid;
+		else
+			hi = mid;
+	}
+	if (hi > lo && BB_OFFSET(p[lo]) > s)
+		hi = lo;
+
+	if (hi > lo) {
+		/* we found a range that might merge with the start
+		 * of our new range
+		 */
+		sector_t a = BB_OFFSET(p[lo]);
+		sector_t e = a + BB_LEN(p[lo]);
+		int ack = BB_ACK(p[lo]);
+		if (e >= s) {
+			/* Yes, we can merge with a previous range */
+			if (s == a && s + sectors >= e)
+				/* new range covers old */
+				ack = acknowledged;
+			else
+				ack = ack && acknowledged;
+
+			if (e < s + sectors)
+				e = s + sectors;
+			if (e - a <= BB_MAX_LEN) {
+				p[lo] = BB_MAKE(a, e-a, ack);
+				s = e;
+			} else {
+				/* does not all fit in one range,
+				 * make p[lo] maximal
+				 */
+				if (BB_LEN(p[lo]) != BB_MAX_LEN)
+					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
+				s = a + BB_MAX_LEN;
+			}
+			sectors = e - s;
+		}
+	}
+	if (sectors && hi < bb->count) {
+		/* 'hi' points to the first range that starts after 's'.
+		 * Maybe we can merge with the start of that range */
+		sector_t a = BB_OFFSET(p[hi]);
+		sector_t e = a + BB_LEN(p[hi]);
+		int ack = BB_ACK(p[hi]);
+		if (a <= s + sectors) {
+			/* merging is possible */
+			if (e <= s + sectors) {
+				/* full overlap */
+				e = s + sectors;
+				ack = acknowledged;
+			} else
+				ack = ack && acknowledged;
+
+			a = s;
+			if (e - a <= BB_MAX_LEN) {
+				p[hi] = BB_MAKE(a, e-a, ack);
+				s = e;
+			} else {
+				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
+				s = a + BB_MAX_LEN;
+			}
+			sectors = e - s;
+			lo = hi;
+			hi++;
+		}
+	}
+	if (sectors == 0 && hi < bb->count) {
+		/* we might be able to combine lo and hi */
+		/* Note: 's' is at the end of 'lo' */
+		sector_t a = BB_OFFSET(p[hi]);
+		int lolen = BB_LEN(p[lo]);
+		int hilen = BB_LEN(p[hi]);
+		int newlen = lolen + hilen - (s - a);
+		if (s >= a && newlen < BB_MAX_LEN) {
+			/* yes, we can combine them */
+			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
+			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
+			memmove(p + hi, p + hi + 1,
+				(bb->count - hi - 1) * 8);
+			bb->count--;
+		}
+	}
+	while (sectors) {
+		/* didn't merge (it all).
+		 * Need to add a range just before 'hi' */
+		if (bb->count >= MD_MAX_BADBLOCKS) {
+			/* No room for more */
+			rv = 0;
+			break;
+		} else {
+			int this_sectors = sectors;
+			memmove(p + hi + 1, p + hi,
+				(bb->count - hi) * 8);
+			bb->count++;
+
+			if (this_sectors > BB_MAX_LEN)
+				this_sectors = BB_MAX_LEN;
+			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
+			sectors -= this_sectors;
+			s += this_sectors;
+		}
+	}
+
+	bb->changed = 1;
+	if (!acknowledged)
+		bb->unacked_exist = 1;
+	write_sequnlock_irq(&bb->lock);
+
+	return rv;
+}
+
+int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors,
+		       int acknowledged)
+{
+	int rv = md_set_badblocks(&rdev->badblocks,
+				  s + rdev->data_offset, sectors, acknowledged);
+	if (rv) {
+		/* Make sure they get written out promptly */
+		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
+		md_wakeup_thread(rdev->mddev->thread);
+	}
+	return rv;
+}
+EXPORT_SYMBOL_GPL(rdev_set_badblocks);
+
+/*
+ * Remove a range of bad blocks from the table.
+ * This may involve extending the table if we spilt a region,
+ * but it must not fail.  So if the table becomes full, we just
+ * drop the remove request.
+ */
+static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
+{
+	u64 *p;
+	int lo, hi;
+	sector_t target = s + sectors;
+	int rv = 0;
+
+	if (bb->shift > 0) {
+		/* When clearing we round the start up and the end down.
+		 * This should not matter as the shift should align with
+		 * the block size and no rounding should ever be needed.
+		 * However it is better the think a block is bad when it
+		 * isn't than to think a block is not bad when it is.
+		 */
+		s += (1<<bb->shift) - 1;
+		s >>= bb->shift;
+		target >>= bb->shift;
+		sectors = target - s;
+	}
+
+	write_seqlock_irq(&bb->lock);
+
+	p = bb->page;
+	lo = 0;
+	hi = bb->count;
+	/* Find the last range that starts before 'target' */
+	while (hi - lo > 1) {
+		int mid = (lo + hi) / 2;
+		sector_t a = BB_OFFSET(p[mid]);
+		if (a < target)
+			lo = mid;
+		else
+			hi = mid;
+	}
+	if (hi > lo) {
+		/* p[lo] is the last range that could overlap the
+		 * current range.  Earlier ranges could also overlap,
+		 * but only this one can overlap the end of the range.
+		 */
+		if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+			/* Partial overlap, leave the tail of this range */
+			int ack = BB_ACK(p[lo]);
+			sector_t a = BB_OFFSET(p[lo]);
+			sector_t end = a + BB_LEN(p[lo]);
+
+			if (a < s) {
+				/* we need to split this range */
+				if (bb->count >= MD_MAX_BADBLOCKS) {
+					rv = 0;
+					goto out;
+				}
+				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+				bb->count++;
+				p[lo] = BB_MAKE(a, s-a, ack);
+				lo++;
+			}
+			p[lo] = BB_MAKE(target, end - target, ack);
+			/* there is no longer an overlap */
+			hi = lo;
+			lo--;
+		}
+		while (lo >= 0 &&
+		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+			/* This range does overlap */
+			if (BB_OFFSET(p[lo]) < s) {
+				/* Keep the early parts of this range. */
+				int ack = BB_ACK(p[lo]);
+				sector_t start = BB_OFFSET(p[lo]);
+				p[lo] = BB_MAKE(start, s - start, ack);
+				/* now low doesn't overlap, so.. */
+				break;
+			}
+			lo--;
+		}
+		/* 'lo' is strictly before, 'hi' is strictly after,
+		 * anything between needs to be discarded
+		 */
+		if (hi - lo > 1) {
+			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
+			bb->count -= (hi - lo - 1);
+		}
+	}
+
+	bb->changed = 1;
+out:
+	write_sequnlock_irq(&bb->lock);
+	return rv;
+}
+
+int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors)
+{
+	return md_clear_badblocks(&rdev->badblocks,
+				  s + rdev->data_offset,
+				  sectors);
+}
+EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
+
+/*
+ * Acknowledge all bad blocks in a list.
+ * This only succeeds if ->changed is clear.  It is used by
+ * in-kernel metadata updates
+ */
+void md_ack_all_badblocks(struct badblocks *bb)
+{
+	if (bb->page == NULL || bb->changed)
+		/* no point even trying */
+		return;
+	write_seqlock_irq(&bb->lock);
+
+	if (bb->changed == 0) {
+		u64 *p = bb->page;
+		int i;
+		for (i = 0; i < bb->count ; i++) {
+			if (!BB_ACK(p[i])) {
+				sector_t start = BB_OFFSET(p[i]);
+				int len = BB_LEN(p[i]);
+				p[i] = BB_MAKE(start, len, 1);
+			}
+		}
+		bb->unacked_exist = 0;
+	}
+	write_sequnlock_irq(&bb->lock);
+}
+EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
+
+/* sysfs access to bad-blocks list.
+ * We present two files.
+ * 'bad-blocks' lists sector numbers and lengths of ranges that
+ *    are recorded as bad.  The list is truncated to fit within
+ *    the one-page limit of sysfs.
+ *    Writing "sector length" to this file adds an acknowledged
+ *    bad block list.
+ * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
+ *    been acknowledged.  Writing to this file adds bad blocks
+ *    without acknowledging them.  This is largely for testing.
+ */
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack)
+{
+	size_t len;
+	int i;
+	u64 *p = bb->page;
+	unsigned seq;
+
+	if (bb->shift < 0)
+		return 0;
+
+retry:
+	seq = read_seqbegin(&bb->lock);
+
+	len = 0;
+	i = 0;
+
+	while (len < PAGE_SIZE && i < bb->count) {
+		sector_t s = BB_OFFSET(p[i]);
+		unsigned int length = BB_LEN(p[i]);
+		int ack = BB_ACK(p[i]);
+		i++;
+
+		if (unack && ack)
+			continue;
+
+		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
+				(unsigned long long)s << bb->shift,
+				length << bb->shift);
+	}
+	if (unack && len == 0)
+		bb->unacked_exist = 0;
+
+	if (read_seqretry(&bb->lock, seq))
+		goto retry;
+
+	return len;
+}
+
+#define DO_DEBUG 1
+
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
+{
+	unsigned long long sector;
+	int length;
+	char newline;
+#ifdef DO_DEBUG
+	/* Allow clearing via sysfs *only* for testing/debugging.
+	 * Normally only a successful write may clear a badblock
+	 */
+	int clear = 0;
+	if (page[0] == '-') {
+		clear = 1;
+		page++;
+	}
+#endif /* DO_DEBUG */
+
+	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
+	case 3:
+		if (newline != '\n')
+			return -EINVAL;
+	case 2:
+		if (length <= 0)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+#ifdef DO_DEBUG
+	if (clear) {
+		md_clear_badblocks(bb, sector, length);
+		return len;
+	}
+#endif /* DO_DEBUG */
+	if (md_set_badblocks(bb, sector, length, !unack))
+		return len;
+	else
+		return -ENOSPC;
+}
+
 static int md_notify_reboot(struct notifier_block *this,
 			    unsigned long code, void *x)
 {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1c26c7a..1e586bb 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -29,6 +29,13 @@
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
 
+/* Bad block numbers are stored sorted in a single page.
+ * 64bits is used for each block or extent.
+ * 54 bits are sector number, 9 bits are extent size,
+ * 1 bit is an 'acknowledged' flag.
+ */
+#define MD_MAX_BADBLOCKS	(PAGE_SIZE/8)
+
 /*
  * MD's 'extended' device
  */
@@ -48,7 +55,7 @@
 	struct block_device *meta_bdev;
 	struct block_device *bdev;	/* block device handle */
 
-	struct page	*sb_page;
+	struct page	*sb_page, *bb_page;
 	int		sb_loaded;
 	__u64		sb_events;
 	sector_t	data_offset;	/* start of data in array */
@@ -74,9 +81,29 @@
 #define	In_sync		2		/* device is in_sync with rest of array */
 #define	WriteMostly	4		/* Avoid reading if at all possible */
 #define	AutoDetected	7		/* added by auto-detect */
-#define Blocked		8		/* An error occurred on an externally
-					 * managed array, don't allow writes
+#define Blocked		8		/* An error occurred but has not yet
+					 * been acknowledged by the metadata
+					 * handler, so don't allow writes
 					 * until it is cleared */
+#define WriteErrorSeen	9		/* A write error has been seen on this
+					 * device
+					 */
+#define FaultRecorded	10		/* Intermediate state for clearing
+					 * Blocked.  The Fault is/will-be
+					 * recorded in the metadata, but that
+					 * metadata hasn't been stored safely
+					 * on disk yet.
+					 */
+#define BlockedBadBlocks 11		/* A writer is blocked because they
+					 * found an unacknowledged bad-block.
+					 * This can safely be cleared at any
+					 * time, and the writer will re-check.
+					 * It may be set at any time, and at
+					 * worst the writer will timeout and
+					 * re-check.  So setting it as
+					 * accurately as possible is good, but
+					 * not absolutely critical.
+					 */
 	wait_queue_head_t blocked_wait;
 
 	int desc_nr;			/* descriptor index in the superblock */
@@ -111,8 +138,54 @@
 
 	struct sysfs_dirent *sysfs_state; /* handle for 'state'
 					   * sysfs entry */
+
+	struct badblocks {
+		int	count;		/* count of bad blocks */
+		int	unacked_exist;	/* there probably are unacknowledged
+					 * bad blocks.  This is only cleared
+					 * when a read discovers none
+					 */
+		int	shift;		/* shift from sectors to block size
+					 * a -ve shift means badblocks are
+					 * disabled.*/
+		u64	*page;		/* badblock list */
+		int	changed;
+		seqlock_t lock;
+
+		sector_t sector;
+		sector_t size;		/* in sectors */
+	} badblocks;
 };
 
+#define BB_LEN_MASK	(0x00000000000001FFULL)
+#define BB_OFFSET_MASK	(0x7FFFFFFFFFFFFE00ULL)
+#define BB_ACK_MASK	(0x8000000000000000ULL)
+#define BB_MAX_LEN	512
+#define BB_OFFSET(x)	(((x) & BB_OFFSET_MASK) >> 9)
+#define BB_LEN(x)	(((x) & BB_LEN_MASK) + 1)
+#define BB_ACK(x)	(!!((x) & BB_ACK_MASK))
+#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
+
+extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+			  sector_t *first_bad, int *bad_sectors);
+static inline int is_badblock(mdk_rdev_t *rdev, sector_t s, int sectors,
+			      sector_t *first_bad, int *bad_sectors)
+{
+	if (unlikely(rdev->badblocks.count)) {
+		int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s,
+					sectors,
+					first_bad, bad_sectors);
+		if (rv)
+			*first_bad -= rdev->data_offset;
+		return rv;
+	}
+	return 0;
+}
+extern int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors,
+			      int acknowledged);
+extern int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors);
+extern void md_ack_all_badblocks(struct badblocks *bb);
+
 struct mddev_s
 {
 	void				*private;
@@ -239,9 +312,12 @@
 #define	MD_RECOVERY_FROZEN	9
 
 	unsigned long			recovery;
-	int				recovery_disabled; /* if we detect that recovery
-							    * will always fail, set this
-							    * so we don't loop trying */
+	/* If a RAID personality determines that recovery (of a particular
+	 * device) will fail due to a read error on the source device, it
+	 * takes a copy of this number and does not attempt recovery again
+	 * until this number changes.
+	 */
+	int				recovery_disabled;
 
 	int				in_sync;	/* know to not need resync */
 	/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
@@ -304,11 +380,6 @@
 							 * hot-adding a bitmap.  It should
 							 * eventually be settable by sysfs.
 							 */
-		/* When md is serving under dm, it might use a
-		 * dirty_log to store the bits.
-		 */
-		struct dm_dirty_log *log;
-
 		struct mutex		mutex;
 		unsigned long		chunksize;
 		unsigned long		daemon_sleep; /* how many jiffies between updates? */
@@ -413,6 +484,20 @@
 	return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
 }
 
+static inline int sysfs_link_rdev(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+	char nm[20];
+	sprintf(nm, "rd%d", rdev->raid_disk);
+	return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+}
+
+static inline void sysfs_unlink_rdev(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+	char nm[20];
+	sprintf(nm, "rd%d", rdev->raid_disk);
+	sysfs_remove_link(&mddev->kobj, nm);
+}
+
 /*
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
@@ -505,7 +590,7 @@
 extern int md_run(mddev_t *mddev);
 extern void md_stop(mddev_t *mddev);
 extern void md_stop_writes(mddev_t *mddev);
-extern void md_rdev_init(mdk_rdev_t *rdev);
+extern int md_rdev_init(mdk_rdev_t *rdev);
 
 extern void mddev_suspend(mddev_t *mddev);
 extern void mddev_resume(mddev_t *mddev);
@@ -514,4 +599,5 @@
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   mddev_t *mddev);
 extern int mddev_check_plugged(mddev_t *mddev);
+extern void md_trim_bio(struct bio *bio, int offset, int size);
 #endif /* _MD_MD_H */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f7431b6..32323f0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -35,16 +35,13 @@
 #include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
+#include <linux/ratelimit.h>
 #include "md.h"
 #include "raid1.h"
 #include "bitmap.h"
 
 #define DEBUG 0
-#if DEBUG
-#define PRINTK(x...) printk(x)
-#else
-#define PRINTK(x...)
-#endif
+#define PRINTK(x...) do { if (DEBUG) printk(x); } while (0)
 
 /*
  * Number of guaranteed r1bios in case of extreme VM load:
@@ -166,7 +163,7 @@
 
 	for (i = 0; i < conf->raid_disks; i++) {
 		struct bio **bio = r1_bio->bios + i;
-		if (*bio && *bio != IO_BLOCKED)
+		if (!BIO_SPECIAL(*bio))
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -176,12 +173,6 @@
 {
 	conf_t *conf = r1_bio->mddev->private;
 
-	/*
-	 * Wake up any possible resync thread that waits for the device
-	 * to go idle.
-	 */
-	allow_barrier(conf);
-
 	put_all_bios(conf, r1_bio);
 	mempool_free(r1_bio, conf->r1bio_pool);
 }
@@ -222,6 +213,33 @@
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
+static void call_bio_endio(r1bio_t *r1_bio)
+{
+	struct bio *bio = r1_bio->master_bio;
+	int done;
+	conf_t *conf = r1_bio->mddev->private;
+
+	if (bio->bi_phys_segments) {
+		unsigned long flags;
+		spin_lock_irqsave(&conf->device_lock, flags);
+		bio->bi_phys_segments--;
+		done = (bio->bi_phys_segments == 0);
+		spin_unlock_irqrestore(&conf->device_lock, flags);
+	} else
+		done = 1;
+
+	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	if (done) {
+		bio_endio(bio, 0);
+		/*
+		 * Wake up any possible resync thread that waits for the device
+		 * to go idle.
+		 */
+		allow_barrier(conf);
+	}
+}
+
 static void raid_end_bio_io(r1bio_t *r1_bio)
 {
 	struct bio *bio = r1_bio->master_bio;
@@ -234,8 +252,7 @@
 			(unsigned long long) bio->bi_sector +
 				(bio->bi_size >> 9) - 1);
 
-		bio_endio(bio,
-			test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
+		call_bio_endio(r1_bio);
 	}
 	free_r1bio(r1_bio);
 }
@@ -287,36 +304,52 @@
 		 * oops, read error:
 		 */
 		char b[BDEVNAME_SIZE];
-		if (printk_ratelimit())
-			printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n",
-			       mdname(conf->mddev),
-			       bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
+		printk_ratelimited(
+			KERN_ERR "md/raid1:%s: %s: "
+			"rescheduling sector %llu\n",
+			mdname(conf->mddev),
+			bdevname(conf->mirrors[mirror].rdev->bdev,
+				 b),
+			(unsigned long long)r1_bio->sector);
+		set_bit(R1BIO_ReadError, &r1_bio->state);
 		reschedule_retry(r1_bio);
 	}
 
 	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 }
 
+static void close_write(r1bio_t *r1_bio)
+{
+	/* it really is the end of this request */
+	if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+		/* free extra copy of the data pages */
+		int i = r1_bio->behind_page_count;
+		while (i--)
+			safe_put_page(r1_bio->behind_bvecs[i].bv_page);
+		kfree(r1_bio->behind_bvecs);
+		r1_bio->behind_bvecs = NULL;
+	}
+	/* clear the bitmap if all writes complete successfully */
+	bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+			r1_bio->sectors,
+			!test_bit(R1BIO_Degraded, &r1_bio->state),
+			test_bit(R1BIO_BehindIO, &r1_bio->state));
+	md_write_end(r1_bio->mddev);
+}
+
 static void r1_bio_write_done(r1bio_t *r1_bio)
 {
-	if (atomic_dec_and_test(&r1_bio->remaining))
-	{
-		/* it really is the end of this request */
-		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-			/* free extra copy of the data pages */
-			int i = r1_bio->behind_page_count;
-			while (i--)
-				safe_put_page(r1_bio->behind_pages[i]);
-			kfree(r1_bio->behind_pages);
-			r1_bio->behind_pages = NULL;
-		}
-		/* clear the bitmap if all writes complete successfully */
-		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
-				r1_bio->sectors,
-				!test_bit(R1BIO_Degraded, &r1_bio->state),
-				test_bit(R1BIO_BehindIO, &r1_bio->state));
-		md_write_end(r1_bio->mddev);
-		raid_end_bio_io(r1_bio);
+	if (!atomic_dec_and_test(&r1_bio->remaining))
+		return;
+
+	if (test_bit(R1BIO_WriteError, &r1_bio->state))
+		reschedule_retry(r1_bio);
+	else {
+		close_write(r1_bio);
+		if (test_bit(R1BIO_MadeGood, &r1_bio->state))
+			reschedule_retry(r1_bio);
+		else
+			raid_end_bio_io(r1_bio);
 	}
 }
 
@@ -336,13 +369,11 @@
 	/*
 	 * 'one mirror IO has finished' event handler:
 	 */
-	r1_bio->bios[mirror] = NULL;
-	to_put = bio;
 	if (!uptodate) {
-		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-		/* an I/O failed, we can't clear the bitmap */
-		set_bit(R1BIO_Degraded, &r1_bio->state);
-	} else
+		set_bit(WriteErrorSeen,
+			&conf->mirrors[mirror].rdev->flags);
+		set_bit(R1BIO_WriteError, &r1_bio->state);
+	} else {
 		/*
 		 * Set R1BIO_Uptodate in our master bio, so that we
 		 * will return a good error code for to the higher
@@ -353,8 +384,22 @@
 		 * to user-side. So if something waits for IO, then it
 		 * will wait for the 'master' bio.
 		 */
+		sector_t first_bad;
+		int bad_sectors;
+
+		r1_bio->bios[mirror] = NULL;
+		to_put = bio;
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
+		/* Maybe we can clear some bad blocks. */
+		if (is_badblock(conf->mirrors[mirror].rdev,
+				r1_bio->sector, r1_bio->sectors,
+				&first_bad, &bad_sectors)) {
+			r1_bio->bios[mirror] = IO_MADE_GOOD;
+			set_bit(R1BIO_MadeGood, &r1_bio->state);
+		}
+	}
+
 	update_head_pos(mirror, r1_bio);
 
 	if (behind) {
@@ -377,11 +422,13 @@
 				       (unsigned long long) mbio->bi_sector,
 				       (unsigned long long) mbio->bi_sector +
 				       (mbio->bi_size >> 9) - 1);
-				bio_endio(mbio, 0);
+				call_bio_endio(r1_bio);
 			}
 		}
 	}
-	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
+	if (r1_bio->bios[mirror] == NULL)
+		rdev_dec_pending(conf->mirrors[mirror].rdev,
+				 conf->mddev);
 
 	/*
 	 * Let's see if all mirrored write operations have finished
@@ -408,10 +455,11 @@
  *
  * The rdev for the device selected will have nr_pending incremented.
  */
-static int read_balance(conf_t *conf, r1bio_t *r1_bio)
+static int read_balance(conf_t *conf, r1bio_t *r1_bio, int *max_sectors)
 {
 	const sector_t this_sector = r1_bio->sector;
-	const int sectors = r1_bio->sectors;
+	int sectors;
+	int best_good_sectors;
 	int start_disk;
 	int best_disk;
 	int i;
@@ -426,8 +474,11 @@
 	 * We take the first readable disk when above the resync window.
 	 */
  retry:
+	sectors = r1_bio->sectors;
 	best_disk = -1;
 	best_dist = MaxSector;
+	best_good_sectors = 0;
+
 	if (conf->mddev->recovery_cp < MaxSector &&
 	    (this_sector + sectors >= conf->next_resync)) {
 		choose_first = 1;
@@ -439,6 +490,9 @@
 
 	for (i = 0 ; i < conf->raid_disks ; i++) {
 		sector_t dist;
+		sector_t first_bad;
+		int bad_sectors;
+
 		int disk = start_disk + i;
 		if (disk >= conf->raid_disks)
 			disk -= conf->raid_disks;
@@ -461,6 +515,35 @@
 		/* This is a reasonable device to use.  It might
 		 * even be best.
 		 */
+		if (is_badblock(rdev, this_sector, sectors,
+				&first_bad, &bad_sectors)) {
+			if (best_dist < MaxSector)
+				/* already have a better device */
+				continue;
+			if (first_bad <= this_sector) {
+				/* cannot read here. If this is the 'primary'
+				 * device, then we must not read beyond
+				 * bad_sectors from another device..
+				 */
+				bad_sectors -= (this_sector - first_bad);
+				if (choose_first && sectors > bad_sectors)
+					sectors = bad_sectors;
+				if (best_good_sectors > sectors)
+					best_good_sectors = sectors;
+
+			} else {
+				sector_t good_sectors = first_bad - this_sector;
+				if (good_sectors > best_good_sectors) {
+					best_good_sectors = good_sectors;
+					best_disk = disk;
+				}
+				if (choose_first)
+					break;
+			}
+			continue;
+		} else
+			best_good_sectors = sectors;
+
 		dist = abs(this_sector - conf->mirrors[disk].head_position);
 		if (choose_first
 		    /* Don't change to another disk for sequential reads */
@@ -489,10 +572,12 @@
 			rdev_dec_pending(rdev, conf->mddev);
 			goto retry;
 		}
+		sectors = best_good_sectors;
 		conf->next_seq_sect = this_sector + sectors;
 		conf->last_used = best_disk;
 	}
 	rcu_read_unlock();
+	*max_sectors = sectors;
 
 	return best_disk;
 }
@@ -672,30 +757,31 @@
 {
 	int i;
 	struct bio_vec *bvec;
-	struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
+	struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
 					GFP_NOIO);
-	if (unlikely(!pages))
+	if (unlikely(!bvecs))
 		return;
 
 	bio_for_each_segment(bvec, bio, i) {
-		pages[i] = alloc_page(GFP_NOIO);
-		if (unlikely(!pages[i]))
+		bvecs[i] = *bvec;
+		bvecs[i].bv_page = alloc_page(GFP_NOIO);
+		if (unlikely(!bvecs[i].bv_page))
 			goto do_sync_io;
-		memcpy(kmap(pages[i]) + bvec->bv_offset,
-			kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
-		kunmap(pages[i]);
+		memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
+		       kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
+		kunmap(bvecs[i].bv_page);
 		kunmap(bvec->bv_page);
 	}
-	r1_bio->behind_pages = pages;
+	r1_bio->behind_bvecs = bvecs;
 	r1_bio->behind_page_count = bio->bi_vcnt;
 	set_bit(R1BIO_BehindIO, &r1_bio->state);
 	return;
 
 do_sync_io:
 	for (i = 0; i < bio->bi_vcnt; i++)
-		if (pages[i])
-			put_page(pages[i]);
-	kfree(pages);
+		if (bvecs[i].bv_page)
+			put_page(bvecs[i].bv_page);
+	kfree(bvecs);
 	PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 }
 
@@ -705,7 +791,7 @@
 	mirror_info_t *mirror;
 	r1bio_t *r1_bio;
 	struct bio *read_bio;
-	int i, targets = 0, disks;
+	int i, disks;
 	struct bitmap *bitmap;
 	unsigned long flags;
 	const int rw = bio_data_dir(bio);
@@ -713,6 +799,9 @@
 	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
 	mdk_rdev_t *blocked_rdev;
 	int plugged;
+	int first_clone;
+	int sectors_handled;
+	int max_sectors;
 
 	/*
 	 * Register the new request and wait if the reconstruction
@@ -759,11 +848,24 @@
 	r1_bio->mddev = mddev;
 	r1_bio->sector = bio->bi_sector;
 
+	/* We might need to issue multiple reads to different
+	 * devices if there are bad blocks around, so we keep
+	 * track of the number of reads in bio->bi_phys_segments.
+	 * If this is 0, there is only one r1_bio and no locking
+	 * will be needed when requests complete.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
 	if (rw == READ) {
 		/*
 		 * read balancing logic:
 		 */
-		int rdisk = read_balance(conf, r1_bio);
+		int rdisk;
+
+read_again:
+		rdisk = read_balance(conf, r1_bio, &max_sectors);
 
 		if (rdisk < 0) {
 			/* couldn't find anywhere to read from */
@@ -784,6 +886,8 @@
 		r1_bio->read_disk = rdisk;
 
 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+		md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector,
+			    max_sectors);
 
 		r1_bio->bios[rdisk] = read_bio;
 
@@ -793,16 +897,52 @@
 		read_bio->bi_rw = READ | do_sync;
 		read_bio->bi_private = r1_bio;
 
-		generic_make_request(read_bio);
+		if (max_sectors < r1_bio->sectors) {
+			/* could not read all from this device, so we will
+			 * need another r1_bio.
+			 */
+
+			sectors_handled = (r1_bio->sector + max_sectors
+					   - bio->bi_sector);
+			r1_bio->sectors = max_sectors;
+			spin_lock_irq(&conf->device_lock);
+			if (bio->bi_phys_segments == 0)
+				bio->bi_phys_segments = 2;
+			else
+				bio->bi_phys_segments++;
+			spin_unlock_irq(&conf->device_lock);
+			/* Cannot call generic_make_request directly
+			 * as that will be queued in __make_request
+			 * and subsequent mempool_alloc might block waiting
+			 * for it.  So hand bio over to raid1d.
+			 */
+			reschedule_retry(r1_bio);
+
+			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+			r1_bio->master_bio = bio;
+			r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+			r1_bio->state = 0;
+			r1_bio->mddev = mddev;
+			r1_bio->sector = bio->bi_sector + sectors_handled;
+			goto read_again;
+		} else
+			generic_make_request(read_bio);
 		return 0;
 	}
 
 	/*
 	 * WRITE:
 	 */
-	/* first select target devices under spinlock and
+	/* first select target devices under rcu_lock and
 	 * inc refcount on their rdev.  Record them by setting
 	 * bios[x] to bio
+	 * If there are known/acknowledged bad blocks on any device on
+	 * which we have seen a write error, we want to avoid writing those
+	 * blocks.
+	 * This potentially requires several writes to write around
+	 * the bad blocks.  Each set of writes gets it's own r1bio
+	 * with a set of bios attached.
 	 */
 	plugged = mddev_check_plugged(mddev);
 
@@ -810,6 +950,7 @@
  retry_write:
 	blocked_rdev = NULL;
 	rcu_read_lock();
+	max_sectors = r1_bio->sectors;
 	for (i = 0;  i < disks; i++) {
 		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
@@ -817,17 +958,56 @@
 			blocked_rdev = rdev;
 			break;
 		}
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			atomic_inc(&rdev->nr_pending);
-			if (test_bit(Faulty, &rdev->flags)) {
-				rdev_dec_pending(rdev, mddev);
-				r1_bio->bios[i] = NULL;
-			} else {
-				r1_bio->bios[i] = bio;
-				targets++;
+		r1_bio->bios[i] = NULL;
+		if (!rdev || test_bit(Faulty, &rdev->flags)) {
+			set_bit(R1BIO_Degraded, &r1_bio->state);
+			continue;
+		}
+
+		atomic_inc(&rdev->nr_pending);
+		if (test_bit(WriteErrorSeen, &rdev->flags)) {
+			sector_t first_bad;
+			int bad_sectors;
+			int is_bad;
+
+			is_bad = is_badblock(rdev, r1_bio->sector,
+					     max_sectors,
+					     &first_bad, &bad_sectors);
+			if (is_bad < 0) {
+				/* mustn't write here until the bad block is
+				 * acknowledged*/
+				set_bit(BlockedBadBlocks, &rdev->flags);
+				blocked_rdev = rdev;
+				break;
 			}
-		} else
-			r1_bio->bios[i] = NULL;
+			if (is_bad && first_bad <= r1_bio->sector) {
+				/* Cannot write here at all */
+				bad_sectors -= (r1_bio->sector - first_bad);
+				if (bad_sectors < max_sectors)
+					/* mustn't write more than bad_sectors
+					 * to other devices yet
+					 */
+					max_sectors = bad_sectors;
+				rdev_dec_pending(rdev, mddev);
+				/* We don't set R1BIO_Degraded as that
+				 * only applies if the disk is
+				 * missing, so it might be re-added,
+				 * and we want to know to recover this
+				 * chunk.
+				 * In this case the device is here,
+				 * and the fact that this chunk is not
+				 * in-sync is recorded in the bad
+				 * block log
+				 */
+				continue;
+			}
+			if (is_bad) {
+				int good_sectors = first_bad - r1_bio->sector;
+				if (good_sectors < max_sectors)
+					max_sectors = good_sectors;
+			}
+		}
+		r1_bio->bios[i] = bio;
 	}
 	rcu_read_unlock();
 
@@ -838,51 +1018,57 @@
 		for (j = 0; j < i; j++)
 			if (r1_bio->bios[j])
 				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-
+		r1_bio->state = 0;
 		allow_barrier(conf);
 		md_wait_for_blocked_rdev(blocked_rdev, mddev);
 		wait_barrier(conf);
 		goto retry_write;
 	}
 
-	BUG_ON(targets == 0); /* we never fail the last device */
-
-	if (targets < conf->raid_disks) {
-		/* array is degraded, we will not clear the bitmap
-		 * on I/O completion (see raid1_end_write_request) */
-		set_bit(R1BIO_Degraded, &r1_bio->state);
+	if (max_sectors < r1_bio->sectors) {
+		/* We are splitting this write into multiple parts, so
+		 * we need to prepare for allocating another r1_bio.
+		 */
+		r1_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
 	}
-
-	/* do behind I/O ?
-	 * Not if there are too many, or cannot allocate memory,
-	 * or a reader on WriteMostly is waiting for behind writes 
-	 * to flush */
-	if (bitmap &&
-	    (atomic_read(&bitmap->behind_writes)
-	     < mddev->bitmap_info.max_write_behind) &&
-	    !waitqueue_active(&bitmap->behind_wait))
-		alloc_behind_pages(bio, r1_bio);
+	sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector;
 
 	atomic_set(&r1_bio->remaining, 1);
 	atomic_set(&r1_bio->behind_remaining, 0);
 
-	bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors,
-				test_bit(R1BIO_BehindIO, &r1_bio->state));
+	first_clone = 1;
 	for (i = 0; i < disks; i++) {
 		struct bio *mbio;
 		if (!r1_bio->bios[i])
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		r1_bio->bios[i] = mbio;
+		md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
 
-		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
-		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-		mbio->bi_end_io	= raid1_end_write_request;
-		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
-		mbio->bi_private = r1_bio;
+		if (first_clone) {
+			/* do behind I/O ?
+			 * Not if there are too many, or cannot
+			 * allocate memory, or a reader on WriteMostly
+			 * is waiting for behind writes to flush */
+			if (bitmap &&
+			    (atomic_read(&bitmap->behind_writes)
+			     < mddev->bitmap_info.max_write_behind) &&
+			    !waitqueue_active(&bitmap->behind_wait))
+				alloc_behind_pages(mbio, r1_bio);
 
-		if (r1_bio->behind_pages) {
+			bitmap_startwrite(bitmap, r1_bio->sector,
+					  r1_bio->sectors,
+					  test_bit(R1BIO_BehindIO,
+						   &r1_bio->state));
+			first_clone = 0;
+		}
+		if (r1_bio->behind_bvecs) {
 			struct bio_vec *bvec;
 			int j;
 
@@ -894,11 +1080,20 @@
 			 * them all
 			 */
 			__bio_for_each_segment(bvec, mbio, j, 0)
-				bvec->bv_page = r1_bio->behind_pages[j];
+				bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
 			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
 				atomic_inc(&r1_bio->behind_remaining);
 		}
 
+		r1_bio->bios[i] = mbio;
+
+		mbio->bi_sector	= (r1_bio->sector +
+				   conf->mirrors[i].rdev->data_offset);
+		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+		mbio->bi_end_io	= raid1_end_write_request;
+		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
+		mbio->bi_private = r1_bio;
+
 		atomic_inc(&r1_bio->remaining);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
@@ -909,6 +1104,19 @@
 	/* In case raid1d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
 
+	if (sectors_handled < (bio->bi_size >> 9)) {
+		/* We need another r1_bio.  It has already been counted
+		 * in bio->bi_phys_segments
+		 */
+		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+		r1_bio->master_bio = bio;
+		r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+		r1_bio->state = 0;
+		r1_bio->mddev = mddev;
+		r1_bio->sector = bio->bi_sector + sectors_handled;
+		goto retry_write;
+	}
+
 	if (do_sync || !bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
 
@@ -952,9 +1160,10 @@
 		 * However don't try a recovery from this drive as
 		 * it is very likely to fail.
 		 */
-		mddev->recovery_disabled = 1;
+		conf->recovery_disabled = mddev->recovery_disabled;
 		return;
 	}
+	set_bit(Blocked, &rdev->flags);
 	if (test_and_clear_bit(In_sync, &rdev->flags)) {
 		unsigned long flags;
 		spin_lock_irqsave(&conf->device_lock, flags);
@@ -1027,7 +1236,7 @@
 		    && !test_bit(Faulty, &rdev->flags)
 		    && !test_and_set_bit(In_sync, &rdev->flags)) {
 			count++;
-			sysfs_notify_dirent(rdev->sysfs_state);
+			sysfs_notify_dirent_safe(rdev->sysfs_state);
 		}
 	}
 	spin_lock_irqsave(&conf->device_lock, flags);
@@ -1048,6 +1257,9 @@
 	int first = 0;
 	int last = mddev->raid_disks - 1;
 
+	if (mddev->recovery_disabled == conf->recovery_disabled)
+		return -EBUSY;
+
 	if (rdev->raid_disk >= 0)
 		first = last = rdev->raid_disk;
 
@@ -1103,7 +1315,7 @@
 		 * is not possible.
 		 */
 		if (!test_bit(Faulty, &rdev->flags) &&
-		    !mddev->recovery_disabled &&
+		    mddev->recovery_disabled != conf->recovery_disabled &&
 		    mddev->degraded < conf->raid_disks) {
 			err = -EBUSY;
 			goto abort;
@@ -1155,6 +1367,8 @@
 	conf_t *conf = mddev->private;
 	int i;
 	int mirror=0;
+	sector_t first_bad;
+	int bad_sectors;
 
 	for (i = 0; i < conf->raid_disks; i++)
 		if (r1_bio->bios[i] == bio) {
@@ -1172,18 +1386,48 @@
 			s += sync_blocks;
 			sectors_to_go -= sync_blocks;
 		} while (sectors_to_go > 0);
-		md_error(mddev, conf->mirrors[mirror].rdev);
-	}
+		set_bit(WriteErrorSeen,
+			&conf->mirrors[mirror].rdev->flags);
+		set_bit(R1BIO_WriteError, &r1_bio->state);
+	} else if (is_badblock(conf->mirrors[mirror].rdev,
+			       r1_bio->sector,
+			       r1_bio->sectors,
+			       &first_bad, &bad_sectors) &&
+		   !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
+				r1_bio->sector,
+				r1_bio->sectors,
+				&first_bad, &bad_sectors)
+		)
+		set_bit(R1BIO_MadeGood, &r1_bio->state);
 
 	update_head_pos(mirror, r1_bio);
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
-		sector_t s = r1_bio->sectors;
-		put_buf(r1_bio);
-		md_done_sync(mddev, s, uptodate);
+		int s = r1_bio->sectors;
+		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+		    test_bit(R1BIO_WriteError, &r1_bio->state))
+			reschedule_retry(r1_bio);
+		else {
+			put_buf(r1_bio);
+			md_done_sync(mddev, s, uptodate);
+		}
 	}
 }
 
+static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+			    int sectors, struct page *page, int rw)
+{
+	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+		/* success */
+		return 1;
+	if (rw == WRITE)
+		set_bit(WriteErrorSeen, &rdev->flags);
+	/* need to record an error - either for the block or the device */
+	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+		md_error(rdev->mddev, rdev);
+	return 0;
+}
+
 static int fix_sync_read_error(r1bio_t *r1_bio)
 {
 	/* Try some synchronous reads of other devices to get
@@ -1193,6 +1437,9 @@
 	 * We don't need to freeze the array, because being in an
 	 * active sync request, there is no normal IO, and
 	 * no overlapping syncs.
+	 * We don't need to check is_badblock() again as we
+	 * made sure that anything with a bad block in range
+	 * will have bi_end_io clear.
 	 */
 	mddev_t *mddev = r1_bio->mddev;
 	conf_t *conf = mddev->private;
@@ -1217,9 +1464,7 @@
 				 * active, and resync is currently active
 				 */
 				rdev = conf->mirrors[d].rdev;
-				if (sync_page_io(rdev,
-						 sect,
-						 s<<9,
+				if (sync_page_io(rdev, sect, s<<9,
 						 bio->bi_io_vec[idx].bv_page,
 						 READ, false)) {
 					success = 1;
@@ -1233,16 +1478,36 @@
 
 		if (!success) {
 			char b[BDEVNAME_SIZE];
-			/* Cannot read from anywhere, array is toast */
-			md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+			int abort = 0;
+			/* Cannot read from anywhere, this block is lost.
+			 * Record a bad block on each device.  If that doesn't
+			 * work just disable and interrupt the recovery.
+			 * Don't fail devices as that won't really help.
+			 */
 			printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
 			       " for block %llu\n",
 			       mdname(mddev),
 			       bdevname(bio->bi_bdev, b),
 			       (unsigned long long)r1_bio->sector);
-			md_done_sync(mddev, r1_bio->sectors, 0);
-			put_buf(r1_bio);
-			return 0;
+			for (d = 0; d < conf->raid_disks; d++) {
+				rdev = conf->mirrors[d].rdev;
+				if (!rdev || test_bit(Faulty, &rdev->flags))
+					continue;
+				if (!rdev_set_badblocks(rdev, sect, s, 0))
+					abort = 1;
+			}
+			if (abort) {
+				mddev->recovery_disabled = 1;
+				set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+				md_done_sync(mddev, r1_bio->sectors, 0);
+				put_buf(r1_bio);
+				return 0;
+			}
+			/* Try next page */
+			sectors -= s;
+			sect += s;
+			idx++;
+			continue;
 		}
 
 		start = d;
@@ -1254,16 +1519,12 @@
 			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
 				continue;
 			rdev = conf->mirrors[d].rdev;
-			if (sync_page_io(rdev,
-					 sect,
-					 s<<9,
-					 bio->bi_io_vec[idx].bv_page,
-					 WRITE, false) == 0) {
+			if (r1_sync_page_io(rdev, sect, s,
+					    bio->bi_io_vec[idx].bv_page,
+					    WRITE) == 0) {
 				r1_bio->bios[d]->bi_end_io = NULL;
 				rdev_dec_pending(rdev, mddev);
-				md_error(mddev, rdev);
-			} else
-				atomic_add(s, &rdev->corrected_errors);
+			}
 		}
 		d = start;
 		while (d != r1_bio->read_disk) {
@@ -1273,12 +1534,10 @@
 			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
 				continue;
 			rdev = conf->mirrors[d].rdev;
-			if (sync_page_io(rdev,
-					 sect,
-					 s<<9,
-					 bio->bi_io_vec[idx].bv_page,
-					 READ, false) == 0)
-				md_error(mddev, rdev);
+			if (r1_sync_page_io(rdev, sect, s,
+					    bio->bi_io_vec[idx].bv_page,
+					    READ) != 0)
+				atomic_add(s, &rdev->corrected_errors);
 		}
 		sectors -= s;
 		sect += s;
@@ -1420,7 +1679,7 @@
  *
  *	1.	Retries failed read operations on working mirrors.
  *	2.	Updates the raid superblock when problems encounter.
- *	3.	Performs writes following reads for array syncronising.
+ *	3.	Performs writes following reads for array synchronising.
  */
 
 static void fix_read_error(conf_t *conf, int read_disk,
@@ -1443,9 +1702,14 @@
 			 * which is the thread that might remove
 			 * a device.  If raid1d ever becomes multi-threaded....
 			 */
+			sector_t first_bad;
+			int bad_sectors;
+
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
 			    test_bit(In_sync, &rdev->flags) &&
+			    is_badblock(rdev, sect, s,
+					&first_bad, &bad_sectors) == 0 &&
 			    sync_page_io(rdev, sect, s<<9,
 					 conf->tmppage, READ, false))
 				success = 1;
@@ -1457,8 +1721,10 @@
 		} while (!success && d != read_disk);
 
 		if (!success) {
-			/* Cannot read from anywhere -- bye bye array */
-			md_error(mddev, conf->mirrors[read_disk].rdev);
+			/* Cannot read from anywhere - mark it bad */
+			mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
+			if (!rdev_set_badblocks(rdev, sect, s, 0))
+				md_error(mddev, rdev);
 			break;
 		}
 		/* write it back and re-read */
@@ -1469,13 +1735,9 @@
 			d--;
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
-				if (sync_page_io(rdev, sect, s<<9,
-						 conf->tmppage, WRITE, false)
-				    == 0)
-					/* Well, this device is dead */
-					md_error(mddev, rdev);
-			}
+			    test_bit(In_sync, &rdev->flags))
+				r1_sync_page_io(rdev, sect, s,
+						conf->tmppage, WRITE);
 		}
 		d = start;
 		while (d != read_disk) {
@@ -1486,12 +1748,8 @@
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
 			    test_bit(In_sync, &rdev->flags)) {
-				if (sync_page_io(rdev, sect, s<<9,
-						 conf->tmppage, READ, false)
-				    == 0)
-					/* Well, this device is dead */
-					md_error(mddev, rdev);
-				else {
+				if (r1_sync_page_io(rdev, sect, s,
+						    conf->tmppage, READ)) {
 					atomic_add(s, &rdev->corrected_errors);
 					printk(KERN_INFO
 					       "md/raid1:%s: read error corrected "
@@ -1508,21 +1766,255 @@
 	}
 }
 
+static void bi_complete(struct bio *bio, int error)
+{
+	complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+	struct completion event;
+	rw |= REQ_SYNC;
+
+	init_completion(&event);
+	bio->bi_private = &event;
+	bio->bi_end_io = bi_complete;
+	submit_bio(rw, bio);
+	wait_for_completion(&event);
+
+	return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r1bio_t *r1_bio, int i)
+{
+	mddev_t *mddev = r1_bio->mddev;
+	conf_t *conf = mddev->private;
+	mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+	int vcnt, idx;
+	struct bio_vec *vec;
+
+	/* bio has the data to be written to device 'i' where
+	 * we just recently had a write error.
+	 * We repeatedly clone the bio and trim down to one block,
+	 * then try the write.  Where the write fails we record
+	 * a bad block.
+	 * It is conceivable that the bio doesn't exactly align with
+	 * blocks.  We must handle this somehow.
+	 *
+	 * We currently own a reference on the rdev.
+	 */
+
+	int block_sectors;
+	sector_t sector;
+	int sectors;
+	int sect_to_write = r1_bio->sectors;
+	int ok = 1;
+
+	if (rdev->badblocks.shift < 0)
+		return 0;
+
+	block_sectors = 1 << rdev->badblocks.shift;
+	sector = r1_bio->sector;
+	sectors = ((sector + block_sectors)
+		   & ~(sector_t)(block_sectors - 1))
+		- sector;
+
+	if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+		vcnt = r1_bio->behind_page_count;
+		vec = r1_bio->behind_bvecs;
+		idx = 0;
+		while (vec[idx].bv_page == NULL)
+			idx++;
+	} else {
+		vcnt = r1_bio->master_bio->bi_vcnt;
+		vec = r1_bio->master_bio->bi_io_vec;
+		idx = r1_bio->master_bio->bi_idx;
+	}
+	while (sect_to_write) {
+		struct bio *wbio;
+		if (sectors > sect_to_write)
+			sectors = sect_to_write;
+		/* Write at 'sector' for 'sectors'*/
+
+		wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+		memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+		wbio->bi_sector = r1_bio->sector;
+		wbio->bi_rw = WRITE;
+		wbio->bi_vcnt = vcnt;
+		wbio->bi_size = r1_bio->sectors << 9;
+		wbio->bi_idx = idx;
+
+		md_trim_bio(wbio, sector - r1_bio->sector, sectors);
+		wbio->bi_sector += rdev->data_offset;
+		wbio->bi_bdev = rdev->bdev;
+		if (submit_bio_wait(WRITE, wbio) == 0)
+			/* failure! */
+			ok = rdev_set_badblocks(rdev, sector,
+						sectors, 0)
+				&& ok;
+
+		bio_put(wbio);
+		sect_to_write -= sectors;
+		sector += sectors;
+		sectors = block_sectors;
+	}
+	return ok;
+}
+
+static void handle_sync_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+	int m;
+	int s = r1_bio->sectors;
+	for (m = 0; m < conf->raid_disks ; m++) {
+		mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+		struct bio *bio = r1_bio->bios[m];
+		if (bio->bi_end_io == NULL)
+			continue;
+		if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+			rdev_clear_badblocks(rdev, r1_bio->sector, s);
+		}
+		if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
+			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
+				md_error(conf->mddev, rdev);
+		}
+	}
+	put_buf(r1_bio);
+	md_done_sync(conf->mddev, s, 1);
+}
+
+static void handle_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+	int m;
+	for (m = 0; m < conf->raid_disks ; m++)
+		if (r1_bio->bios[m] == IO_MADE_GOOD) {
+			mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+			rdev_clear_badblocks(rdev,
+					     r1_bio->sector,
+					     r1_bio->sectors);
+			rdev_dec_pending(rdev, conf->mddev);
+		} else if (r1_bio->bios[m] != NULL) {
+			/* This drive got a write error.  We need to
+			 * narrow down and record precise write
+			 * errors.
+			 */
+			if (!narrow_write_error(r1_bio, m)) {
+				md_error(conf->mddev,
+					 conf->mirrors[m].rdev);
+				/* an I/O failed, we can't clear the bitmap */
+				set_bit(R1BIO_Degraded, &r1_bio->state);
+			}
+			rdev_dec_pending(conf->mirrors[m].rdev,
+					 conf->mddev);
+		}
+	if (test_bit(R1BIO_WriteError, &r1_bio->state))
+		close_write(r1_bio);
+	raid_end_bio_io(r1_bio);
+}
+
+static void handle_read_error(conf_t *conf, r1bio_t *r1_bio)
+{
+	int disk;
+	int max_sectors;
+	mddev_t *mddev = conf->mddev;
+	struct bio *bio;
+	char b[BDEVNAME_SIZE];
+	mdk_rdev_t *rdev;
+
+	clear_bit(R1BIO_ReadError, &r1_bio->state);
+	/* we got a read error. Maybe the drive is bad.  Maybe just
+	 * the block and we can fix it.
+	 * We freeze all other IO, and try reading the block from
+	 * other devices.  When we find one, we re-write
+	 * and check it that fixes the read error.
+	 * This is all done synchronously while the array is
+	 * frozen
+	 */
+	if (mddev->ro == 0) {
+		freeze_array(conf);
+		fix_read_error(conf, r1_bio->read_disk,
+			       r1_bio->sector, r1_bio->sectors);
+		unfreeze_array(conf);
+	} else
+		md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+
+	bio = r1_bio->bios[r1_bio->read_disk];
+	bdevname(bio->bi_bdev, b);
+read_more:
+	disk = read_balance(conf, r1_bio, &max_sectors);
+	if (disk == -1) {
+		printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
+		       " read error for block %llu\n",
+		       mdname(mddev), b, (unsigned long long)r1_bio->sector);
+		raid_end_bio_io(r1_bio);
+	} else {
+		const unsigned long do_sync
+			= r1_bio->master_bio->bi_rw & REQ_SYNC;
+		if (bio) {
+			r1_bio->bios[r1_bio->read_disk] =
+				mddev->ro ? IO_BLOCKED : NULL;
+			bio_put(bio);
+		}
+		r1_bio->read_disk = disk;
+		bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+		md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
+		r1_bio->bios[r1_bio->read_disk] = bio;
+		rdev = conf->mirrors[disk].rdev;
+		printk_ratelimited(KERN_ERR
+				   "md/raid1:%s: redirecting sector %llu"
+				   " to other mirror: %s\n",
+				   mdname(mddev),
+				   (unsigned long long)r1_bio->sector,
+				   bdevname(rdev->bdev, b));
+		bio->bi_sector = r1_bio->sector + rdev->data_offset;
+		bio->bi_bdev = rdev->bdev;
+		bio->bi_end_io = raid1_end_read_request;
+		bio->bi_rw = READ | do_sync;
+		bio->bi_private = r1_bio;
+		if (max_sectors < r1_bio->sectors) {
+			/* Drat - have to split this up more */
+			struct bio *mbio = r1_bio->master_bio;
+			int sectors_handled = (r1_bio->sector + max_sectors
+					       - mbio->bi_sector);
+			r1_bio->sectors = max_sectors;
+			spin_lock_irq(&conf->device_lock);
+			if (mbio->bi_phys_segments == 0)
+				mbio->bi_phys_segments = 2;
+			else
+				mbio->bi_phys_segments++;
+			spin_unlock_irq(&conf->device_lock);
+			generic_make_request(bio);
+			bio = NULL;
+
+			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+			r1_bio->master_bio = mbio;
+			r1_bio->sectors = (mbio->bi_size >> 9)
+					  - sectors_handled;
+			r1_bio->state = 0;
+			set_bit(R1BIO_ReadError, &r1_bio->state);
+			r1_bio->mddev = mddev;
+			r1_bio->sector = mbio->bi_sector + sectors_handled;
+
+			goto read_more;
+		} else
+			generic_make_request(bio);
+	}
+}
+
 static void raid1d(mddev_t *mddev)
 {
 	r1bio_t *r1_bio;
-	struct bio *bio;
 	unsigned long flags;
 	conf_t *conf = mddev->private;
 	struct list_head *head = &conf->retry_list;
-	mdk_rdev_t *rdev;
 	struct blk_plug plug;
 
 	md_check_recovery(mddev);
 
 	blk_start_plug(&plug);
 	for (;;) {
-		char b[BDEVNAME_SIZE];
 
 		if (atomic_read(&mddev->plug_cnt) == 0)
 			flush_pending_writes(conf);
@@ -1539,62 +2031,26 @@
 
 		mddev = r1_bio->mddev;
 		conf = mddev->private;
-		if (test_bit(R1BIO_IsSync, &r1_bio->state))
-			sync_request_write(mddev, r1_bio);
-		else {
-			int disk;
-
-			/* we got a read error. Maybe the drive is bad.  Maybe just
-			 * the block and we can fix it.
-			 * We freeze all other IO, and try reading the block from
-			 * other devices.  When we find one, we re-write
-			 * and check it that fixes the read error.
-			 * This is all done synchronously while the array is
-			 * frozen
+		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
+			if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+			    test_bit(R1BIO_WriteError, &r1_bio->state))
+				handle_sync_write_finished(conf, r1_bio);
+			else
+				sync_request_write(mddev, r1_bio);
+		} else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+			   test_bit(R1BIO_WriteError, &r1_bio->state))
+			handle_write_finished(conf, r1_bio);
+		else if (test_bit(R1BIO_ReadError, &r1_bio->state))
+			handle_read_error(conf, r1_bio);
+		else
+			/* just a partial read to be scheduled from separate
+			 * context
 			 */
-			if (mddev->ro == 0) {
-				freeze_array(conf);
-				fix_read_error(conf, r1_bio->read_disk,
-					       r1_bio->sector,
-					       r1_bio->sectors);
-				unfreeze_array(conf);
-			} else
-				md_error(mddev,
-					 conf->mirrors[r1_bio->read_disk].rdev);
+			generic_make_request(r1_bio->bios[r1_bio->read_disk]);
 
-			bio = r1_bio->bios[r1_bio->read_disk];
-			if ((disk=read_balance(conf, r1_bio)) == -1) {
-				printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
-				       " read error for block %llu\n",
-				       mdname(mddev),
-				       bdevname(bio->bi_bdev,b),
-				       (unsigned long long)r1_bio->sector);
-				raid_end_bio_io(r1_bio);
-			} else {
-				const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
-				r1_bio->bios[r1_bio->read_disk] =
-					mddev->ro ? IO_BLOCKED : NULL;
-				r1_bio->read_disk = disk;
-				bio_put(bio);
-				bio = bio_clone_mddev(r1_bio->master_bio,
-						      GFP_NOIO, mddev);
-				r1_bio->bios[r1_bio->read_disk] = bio;
-				rdev = conf->mirrors[disk].rdev;
-				if (printk_ratelimit())
-					printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to"
-					       " other mirror: %s\n",
-					       mdname(mddev),
-					       (unsigned long long)r1_bio->sector,
-					       bdevname(rdev->bdev,b));
-				bio->bi_sector = r1_bio->sector + rdev->data_offset;
-				bio->bi_bdev = rdev->bdev;
-				bio->bi_end_io = raid1_end_read_request;
-				bio->bi_rw = READ | do_sync;
-				bio->bi_private = r1_bio;
-				generic_make_request(bio);
-			}
-		}
 		cond_resched();
+		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+			md_check_recovery(mddev);
 	}
 	blk_finish_plug(&plug);
 }
@@ -1636,6 +2092,8 @@
 	int write_targets = 0, read_targets = 0;
 	sector_t sync_blocks;
 	int still_degraded = 0;
+	int good_sectors = RESYNC_SECTORS;
+	int min_bad = 0; /* number of sectors that are bad in all devices */
 
 	if (!conf->r1buf_pool)
 		if (init_resync(conf))
@@ -1723,36 +2181,89 @@
 
 		rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev == NULL ||
-			   test_bit(Faulty, &rdev->flags)) {
+		    test_bit(Faulty, &rdev->flags)) {
 			still_degraded = 1;
-			continue;
 		} else if (!test_bit(In_sync, &rdev->flags)) {
 			bio->bi_rw = WRITE;
 			bio->bi_end_io = end_sync_write;
 			write_targets ++;
 		} else {
 			/* may need to read from here */
-			bio->bi_rw = READ;
-			bio->bi_end_io = end_sync_read;
-			if (test_bit(WriteMostly, &rdev->flags)) {
-				if (wonly < 0)
-					wonly = i;
-			} else {
-				if (disk < 0)
-					disk = i;
+			sector_t first_bad = MaxSector;
+			int bad_sectors;
+
+			if (is_badblock(rdev, sector_nr, good_sectors,
+					&first_bad, &bad_sectors)) {
+				if (first_bad > sector_nr)
+					good_sectors = first_bad - sector_nr;
+				else {
+					bad_sectors -= (sector_nr - first_bad);
+					if (min_bad == 0 ||
+					    min_bad > bad_sectors)
+						min_bad = bad_sectors;
+				}
 			}
-			read_targets++;
+			if (sector_nr < first_bad) {
+				if (test_bit(WriteMostly, &rdev->flags)) {
+					if (wonly < 0)
+						wonly = i;
+				} else {
+					if (disk < 0)
+						disk = i;
+				}
+				bio->bi_rw = READ;
+				bio->bi_end_io = end_sync_read;
+				read_targets++;
+			}
 		}
-		atomic_inc(&rdev->nr_pending);
-		bio->bi_sector = sector_nr + rdev->data_offset;
-		bio->bi_bdev = rdev->bdev;
-		bio->bi_private = r1_bio;
+		if (bio->bi_end_io) {
+			atomic_inc(&rdev->nr_pending);
+			bio->bi_sector = sector_nr + rdev->data_offset;
+			bio->bi_bdev = rdev->bdev;
+			bio->bi_private = r1_bio;
+		}
 	}
 	rcu_read_unlock();
 	if (disk < 0)
 		disk = wonly;
 	r1_bio->read_disk = disk;
 
+	if (read_targets == 0 && min_bad > 0) {
+		/* These sectors are bad on all InSync devices, so we
+		 * need to mark them bad on all write targets
+		 */
+		int ok = 1;
+		for (i = 0 ; i < conf->raid_disks ; i++)
+			if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
+				mdk_rdev_t *rdev =
+					rcu_dereference(conf->mirrors[i].rdev);
+				ok = rdev_set_badblocks(rdev, sector_nr,
+							min_bad, 0
+					) && ok;
+			}
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
+		*skipped = 1;
+		put_buf(r1_bio);
+
+		if (!ok) {
+			/* Cannot record the badblocks, so need to
+			 * abort the resync.
+			 * If there are multiple read targets, could just
+			 * fail the really bad ones ???
+			 */
+			conf->recovery_disabled = mddev->recovery_disabled;
+			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+			return 0;
+		} else
+			return min_bad;
+
+	}
+	if (min_bad > 0 && min_bad < good_sectors) {
+		/* only resync enough to reach the next bad->good
+		 * transition */
+		good_sectors = min_bad;
+	}
+
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
 		/* extra read targets are also write targets */
 		write_targets += read_targets-1;
@@ -1769,6 +2280,8 @@
 
 	if (max_sector > mddev->resync_max)
 		max_sector = mddev->resync_max; /* Don't do IO beyond here */
+	if (max_sector > sector_nr + good_sectors)
+		max_sector = sector_nr + good_sectors;
 	nr_sectors = 0;
 	sync_blocks = 0;
 	do {
@@ -2154,18 +2667,13 @@
 	for (d = d2 = 0; d < conf->raid_disks; d++) {
 		mdk_rdev_t *rdev = conf->mirrors[d].rdev;
 		if (rdev && rdev->raid_disk != d2) {
-			char nm[20];
-			sprintf(nm, "rd%d", rdev->raid_disk);
-			sysfs_remove_link(&mddev->kobj, nm);
+			sysfs_unlink_rdev(mddev, rdev);
 			rdev->raid_disk = d2;
-			sprintf(nm, "rd%d", rdev->raid_disk);
-			sysfs_remove_link(&mddev->kobj, nm);
-			if (sysfs_create_link(&mddev->kobj,
-					      &rdev->kobj, nm))
+			sysfs_unlink_rdev(mddev, rdev);
+			if (sysfs_link_rdev(mddev, rdev))
 				printk(KERN_WARNING
-				       "md/raid1:%s: cannot register "
-				       "%s\n",
-				       mdname(mddev), nm);
+				       "md/raid1:%s: cannot register rd%d\n",
+				       mdname(mddev), rdev->raid_disk);
 		}
 		if (rdev)
 			newmirrors[d2++].rdev = rdev;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index e743a64..e0d676b 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -48,6 +48,12 @@
 					    * (fresh device added).
 					    * Cleared when a sync completes.
 					    */
+	int			recovery_disabled; /* when the same as
+						    * mddev->recovery_disabled
+						    * we don't allow recovery
+						    * to be attempted as we
+						    * expect a read error
+						    */
 
 	wait_queue_head_t	wait_barrier;
 
@@ -95,7 +101,7 @@
 
 	struct list_head	retry_list;
 	/* Next two are only valid when R1BIO_BehindIO is set */
-	struct page		**behind_pages;
+	struct bio_vec		*behind_bvecs;
 	int			behind_page_count;
 	/*
 	 * if the IO is in WRITE direction, then multiple bios are used.
@@ -110,13 +116,24 @@
  * correct the read error.  To keep track of bad blocks on a per-bio
  * level, we store IO_BLOCKED in the appropriate 'bios' pointer
  */
-#define IO_BLOCKED ((struct bio*)1)
+#define IO_BLOCKED ((struct bio *)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context.  So we record
+ * the success by setting bios[n] to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
 
 /* bits for r1bio.state */
 #define	R1BIO_Uptodate	0
 #define	R1BIO_IsSync	1
 #define	R1BIO_Degraded	2
 #define	R1BIO_BehindIO	3
+/* Set ReadError on bios that experience a readerror so that
+ * raid1d knows what to do with them.
+ */
+#define R1BIO_ReadError 4
 /* For write-behind requests, we call bi_end_io when
  * the last non-write-behind device completes, providing
  * any write was successful.  Otherwise we call when
@@ -125,6 +142,11 @@
  * Record that bi_end_io was called with this flag...
  */
 #define	R1BIO_Returned 6
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag
+ */
+#define	R1BIO_MadeGood 7
+#define	R1BIO_WriteError 8
 
 extern int md_raid1_congested(mddev_t *mddev, int bits);
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6e84668..8b29cd4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
+#include <linux/ratelimit.h>
 #include "md.h"
 #include "raid10.h"
 #include "raid0.h"
@@ -123,7 +124,14 @@
 	for (j = 0 ; j < nalloc; j++) {
 		bio = r10_bio->devs[j].bio;
 		for (i = 0; i < RESYNC_PAGES; i++) {
-			page = alloc_page(gfp_flags);
+			if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
+						&conf->mddev->recovery)) {
+				/* we can share bv_page's during recovery */
+				struct bio *rbio = r10_bio->devs[0].bio;
+				page = rbio->bi_io_vec[i].bv_page;
+				get_page(page);
+			} else
+				page = alloc_page(gfp_flags);
 			if (unlikely(!page))
 				goto out_free_pages;
 
@@ -173,7 +181,7 @@
 
 	for (i = 0; i < conf->copies; i++) {
 		struct bio **bio = & r10_bio->devs[i].bio;
-		if (*bio && *bio != IO_BLOCKED)
+		if (!BIO_SPECIAL(*bio))
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -183,12 +191,6 @@
 {
 	conf_t *conf = r10_bio->mddev->private;
 
-	/*
-	 * Wake up any possible resync thread that waits for the device
-	 * to go idle.
-	 */
-	allow_barrier(conf);
-
 	put_all_bios(conf, r10_bio);
 	mempool_free(r10_bio, conf->r10bio_pool);
 }
@@ -227,9 +229,27 @@
 static void raid_end_bio_io(r10bio_t *r10_bio)
 {
 	struct bio *bio = r10_bio->master_bio;
+	int done;
+	conf_t *conf = r10_bio->mddev->private;
 
-	bio_endio(bio,
-		test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO);
+	if (bio->bi_phys_segments) {
+		unsigned long flags;
+		spin_lock_irqsave(&conf->device_lock, flags);
+		bio->bi_phys_segments--;
+		done = (bio->bi_phys_segments == 0);
+		spin_unlock_irqrestore(&conf->device_lock, flags);
+	} else
+		done = 1;
+	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	if (done) {
+		bio_endio(bio, 0);
+		/*
+		 * Wake up any possible resync thread that waits for the device
+		 * to go idle.
+		 */
+		allow_barrier(conf);
+	}
 	free_r10bio(r10_bio);
 }
 
@@ -244,6 +264,26 @@
 		r10_bio->devs[slot].addr + (r10_bio->sectors);
 }
 
+/*
+ * Find the disk number which triggered given bio
+ */
+static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio,
+			 struct bio *bio, int *slotp)
+{
+	int slot;
+
+	for (slot = 0; slot < conf->copies; slot++)
+		if (r10_bio->devs[slot].bio == bio)
+			break;
+
+	BUG_ON(slot == conf->copies);
+	update_head_pos(slot, r10_bio);
+
+	if (slotp)
+		*slotp = slot;
+	return r10_bio->devs[slot].devnum;
+}
+
 static void raid10_end_read_request(struct bio *bio, int error)
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -277,34 +317,45 @@
 		 * oops, read error - keep the refcount on the rdev
 		 */
 		char b[BDEVNAME_SIZE];
-		if (printk_ratelimit())
-			printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n",
-			       mdname(conf->mddev),
-			       bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
+		printk_ratelimited(KERN_ERR
+				   "md/raid10:%s: %s: rescheduling sector %llu\n",
+				   mdname(conf->mddev),
+				   bdevname(conf->mirrors[dev].rdev->bdev, b),
+				   (unsigned long long)r10_bio->sector);
+		set_bit(R10BIO_ReadError, &r10_bio->state);
 		reschedule_retry(r10_bio);
 	}
 }
 
+static void close_write(r10bio_t *r10_bio)
+{
+	/* clear the bitmap if all writes complete successfully */
+	bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+			r10_bio->sectors,
+			!test_bit(R10BIO_Degraded, &r10_bio->state),
+			0);
+	md_write_end(r10_bio->mddev);
+}
+
 static void raid10_end_write_request(struct bio *bio, int error)
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r10bio_t *r10_bio = bio->bi_private;
-	int slot, dev;
+	int dev;
+	int dec_rdev = 1;
 	conf_t *conf = r10_bio->mddev->private;
+	int slot;
 
-	for (slot = 0; slot < conf->copies; slot++)
-		if (r10_bio->devs[slot].bio == bio)
-			break;
-	dev = r10_bio->devs[slot].devnum;
+	dev = find_bio_disk(conf, r10_bio, bio, &slot);
 
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
 	if (!uptodate) {
-		md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
-		/* an I/O failed, we can't clear the bitmap */
-		set_bit(R10BIO_Degraded, &r10_bio->state);
-	} else
+		set_bit(WriteErrorSeen,	&conf->mirrors[dev].rdev->flags);
+		set_bit(R10BIO_WriteError, &r10_bio->state);
+		dec_rdev = 0;
+	} else {
 		/*
 		 * Set R10BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -314,9 +365,22 @@
 		 * user-side. So if something waits for IO, then it will
 		 * wait for the 'master' bio.
 		 */
+		sector_t first_bad;
+		int bad_sectors;
+
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
 
-	update_head_pos(slot, r10_bio);
+		/* Maybe we can clear some bad blocks. */
+		if (is_badblock(conf->mirrors[dev].rdev,
+				r10_bio->devs[slot].addr,
+				r10_bio->sectors,
+				&first_bad, &bad_sectors)) {
+			bio_put(bio);
+			r10_bio->devs[slot].bio = IO_MADE_GOOD;
+			dec_rdev = 0;
+			set_bit(R10BIO_MadeGood, &r10_bio->state);
+		}
+	}
 
 	/*
 	 *
@@ -324,16 +388,18 @@
 	 * already.
 	 */
 	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		/* clear the bitmap if all writes complete successfully */
-		bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-				r10_bio->sectors,
-				!test_bit(R10BIO_Degraded, &r10_bio->state),
-				0);
-		md_write_end(r10_bio->mddev);
-		raid_end_bio_io(r10_bio);
+		if (test_bit(R10BIO_WriteError, &r10_bio->state))
+			reschedule_retry(r10_bio);
+		else {
+			close_write(r10_bio);
+			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+				reschedule_retry(r10_bio);
+			else
+				raid_end_bio_io(r10_bio);
+		}
 	}
-
-	rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+	if (dec_rdev)
+		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
 
 
@@ -484,11 +550,12 @@
  * FIXME: possibly should rethink readbalancing and do it differently
  * depending on near_copies / far_copies geometry.
  */
-static int read_balance(conf_t *conf, r10bio_t *r10_bio)
+static int read_balance(conf_t *conf, r10bio_t *r10_bio, int *max_sectors)
 {
 	const sector_t this_sector = r10_bio->sector;
 	int disk, slot;
-	const int sectors = r10_bio->sectors;
+	int sectors = r10_bio->sectors;
+	int best_good_sectors;
 	sector_t new_distance, best_dist;
 	mdk_rdev_t *rdev;
 	int do_balance;
@@ -497,8 +564,10 @@
 	raid10_find_phys(conf, r10_bio);
 	rcu_read_lock();
 retry:
+	sectors = r10_bio->sectors;
 	best_slot = -1;
 	best_dist = MaxSector;
+	best_good_sectors = 0;
 	do_balance = 1;
 	/*
 	 * Check if we can balance. We can balance on the whole
@@ -511,6 +580,10 @@
 		do_balance = 0;
 
 	for (slot = 0; slot < conf->copies ; slot++) {
+		sector_t first_bad;
+		int bad_sectors;
+		sector_t dev_sector;
+
 		if (r10_bio->devs[slot].bio == IO_BLOCKED)
 			continue;
 		disk = r10_bio->devs[slot].devnum;
@@ -520,6 +593,37 @@
 		if (!test_bit(In_sync, &rdev->flags))
 			continue;
 
+		dev_sector = r10_bio->devs[slot].addr;
+		if (is_badblock(rdev, dev_sector, sectors,
+				&first_bad, &bad_sectors)) {
+			if (best_dist < MaxSector)
+				/* Already have a better slot */
+				continue;
+			if (first_bad <= dev_sector) {
+				/* Cannot read here.  If this is the
+				 * 'primary' device, then we must not read
+				 * beyond 'bad_sectors' from another device.
+				 */
+				bad_sectors -= (dev_sector - first_bad);
+				if (!do_balance && sectors > bad_sectors)
+					sectors = bad_sectors;
+				if (best_good_sectors > sectors)
+					best_good_sectors = sectors;
+			} else {
+				sector_t good_sectors =
+					first_bad - dev_sector;
+				if (good_sectors > best_good_sectors) {
+					best_good_sectors = good_sectors;
+					best_slot = slot;
+				}
+				if (!do_balance)
+					/* Must read from here */
+					break;
+			}
+			continue;
+		} else
+			best_good_sectors = sectors;
+
 		if (!do_balance)
 			break;
 
@@ -561,6 +665,7 @@
 	} else
 		disk = -1;
 	rcu_read_unlock();
+	*max_sectors = best_good_sectors;
 
 	return disk;
 }
@@ -734,6 +839,8 @@
 	unsigned long flags;
 	mdk_rdev_t *blocked_rdev;
 	int plugged;
+	int sectors_handled;
+	int max_sectors;
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
@@ -808,12 +915,26 @@
 	r10_bio->sector = bio->bi_sector;
 	r10_bio->state = 0;
 
+	/* We might need to issue multiple reads to different
+	 * devices if there are bad blocks around, so we keep
+	 * track of the number of reads in bio->bi_phys_segments.
+	 * If this is 0, there is only one r10_bio and no locking
+	 * will be needed when the request completes.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
 	if (rw == READ) {
 		/*
 		 * read balancing logic:
 		 */
-		int disk = read_balance(conf, r10_bio);
-		int slot = r10_bio->read_slot;
+		int disk;
+		int slot;
+
+read_again:
+		disk = read_balance(conf, r10_bio, &max_sectors);
+		slot = r10_bio->read_slot;
 		if (disk < 0) {
 			raid_end_bio_io(r10_bio);
 			return 0;
@@ -821,6 +942,8 @@
 		mirror = conf->mirrors + disk;
 
 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+		md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector,
+			    max_sectors);
 
 		r10_bio->devs[slot].bio = read_bio;
 
@@ -831,7 +954,37 @@
 		read_bio->bi_rw = READ | do_sync;
 		read_bio->bi_private = r10_bio;
 
-		generic_make_request(read_bio);
+		if (max_sectors < r10_bio->sectors) {
+			/* Could not read all from this device, so we will
+			 * need another r10_bio.
+			 */
+			sectors_handled = (r10_bio->sectors + max_sectors
+					   - bio->bi_sector);
+			r10_bio->sectors = max_sectors;
+			spin_lock_irq(&conf->device_lock);
+			if (bio->bi_phys_segments == 0)
+				bio->bi_phys_segments = 2;
+			else
+				bio->bi_phys_segments++;
+			spin_unlock(&conf->device_lock);
+			/* Cannot call generic_make_request directly
+			 * as that will be queued in __generic_make_request
+			 * and subsequent mempool_alloc might block
+			 * waiting for it.  so hand bio over to raid10d.
+			 */
+			reschedule_retry(r10_bio);
+
+			r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+			r10_bio->master_bio = bio;
+			r10_bio->sectors = ((bio->bi_size >> 9)
+					    - sectors_handled);
+			r10_bio->state = 0;
+			r10_bio->mddev = mddev;
+			r10_bio->sector = bio->bi_sector + sectors_handled;
+			goto read_again;
+		} else
+			generic_make_request(read_bio);
 		return 0;
 	}
 
@@ -841,13 +994,22 @@
 	/* first select target devices under rcu_lock and
 	 * inc refcount on their rdev.  Record them by setting
 	 * bios[x] to bio
+	 * If there are known/acknowledged bad blocks on any device
+	 * on which we have seen a write error, we want to avoid
+	 * writing to those blocks.  This potentially requires several
+	 * writes to write around the bad blocks.  Each set of writes
+	 * gets its own r10_bio with a set of bios attached.  The number
+	 * of r10_bios is recored in bio->bi_phys_segments just as with
+	 * the read case.
 	 */
 	plugged = mddev_check_plugged(mddev);
 
 	raid10_find_phys(conf, r10_bio);
- retry_write:
+retry_write:
 	blocked_rdev = NULL;
 	rcu_read_lock();
+	max_sectors = r10_bio->sectors;
+
 	for (i = 0;  i < conf->copies; i++) {
 		int d = r10_bio->devs[i].devnum;
 		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
@@ -856,13 +1018,55 @@
 			blocked_rdev = rdev;
 			break;
 		}
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			atomic_inc(&rdev->nr_pending);
-			r10_bio->devs[i].bio = bio;
-		} else {
-			r10_bio->devs[i].bio = NULL;
+		r10_bio->devs[i].bio = NULL;
+		if (!rdev || test_bit(Faulty, &rdev->flags)) {
 			set_bit(R10BIO_Degraded, &r10_bio->state);
+			continue;
 		}
+		if (test_bit(WriteErrorSeen, &rdev->flags)) {
+			sector_t first_bad;
+			sector_t dev_sector = r10_bio->devs[i].addr;
+			int bad_sectors;
+			int is_bad;
+
+			is_bad = is_badblock(rdev, dev_sector,
+					     max_sectors,
+					     &first_bad, &bad_sectors);
+			if (is_bad < 0) {
+				/* Mustn't write here until the bad block
+				 * is acknowledged
+				 */
+				atomic_inc(&rdev->nr_pending);
+				set_bit(BlockedBadBlocks, &rdev->flags);
+				blocked_rdev = rdev;
+				break;
+			}
+			if (is_bad && first_bad <= dev_sector) {
+				/* Cannot write here at all */
+				bad_sectors -= (dev_sector - first_bad);
+				if (bad_sectors < max_sectors)
+					/* Mustn't write more than bad_sectors
+					 * to other devices yet
+					 */
+					max_sectors = bad_sectors;
+				/* We don't set R10BIO_Degraded as that
+				 * only applies if the disk is missing,
+				 * so it might be re-added, and we want to
+				 * know to recover this chunk.
+				 * In this case the device is here, and the
+				 * fact that this chunk is not in-sync is
+				 * recorded in the bad block log.
+				 */
+				continue;
+			}
+			if (is_bad) {
+				int good_sectors = first_bad - dev_sector;
+				if (good_sectors < max_sectors)
+					max_sectors = good_sectors;
+			}
+		}
+		r10_bio->devs[i].bio = bio;
+		atomic_inc(&rdev->nr_pending);
 	}
 	rcu_read_unlock();
 
@@ -882,8 +1086,22 @@
 		goto retry_write;
 	}
 
+	if (max_sectors < r10_bio->sectors) {
+		/* We are splitting this into multiple parts, so
+		 * we need to prepare for allocating another r10_bio.
+		 */
+		r10_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+	}
+	sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
+
 	atomic_set(&r10_bio->remaining, 1);
-	bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
+	bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
 
 	for (i = 0; i < conf->copies; i++) {
 		struct bio *mbio;
@@ -892,10 +1110,12 @@
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+		md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+			    max_sectors);
 		r10_bio->devs[i].bio = mbio;
 
-		mbio->bi_sector	= r10_bio->devs[i].addr+
-			conf->mirrors[d].rdev->data_offset;
+		mbio->bi_sector	= (r10_bio->devs[i].addr+
+				   conf->mirrors[d].rdev->data_offset);
 		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
 		mbio->bi_end_io	= raid10_end_write_request;
 		mbio->bi_rw = WRITE | do_sync | do_fua;
@@ -920,6 +1140,21 @@
 	/* In case raid10d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
 
+	if (sectors_handled < (bio->bi_size >> 9)) {
+		/* We need another r10_bio.  It has already been counted
+		 * in bio->bi_phys_segments.
+		 */
+		r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+		r10_bio->master_bio = bio;
+		r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+
+		r10_bio->mddev = mddev;
+		r10_bio->sector = bio->bi_sector + sectors_handled;
+		r10_bio->state = 0;
+		goto retry_write;
+	}
+
 	if (do_sync || !mddev->bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
 	return 0;
@@ -949,6 +1184,30 @@
 	seq_printf(seq, "]");
 }
 
+/* check if there are enough drives for
+ * every block to appear on atleast one.
+ * Don't consider the device numbered 'ignore'
+ * as we might be about to remove it.
+ */
+static int enough(conf_t *conf, int ignore)
+{
+	int first = 0;
+
+	do {
+		int n = conf->copies;
+		int cnt = 0;
+		while (n--) {
+			if (conf->mirrors[first].rdev &&
+			    first != ignore)
+				cnt++;
+			first = (first+1) % conf->raid_disks;
+		}
+		if (cnt == 0)
+			return 0;
+	} while (first != 0);
+	return 1;
+}
+
 static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 {
 	char b[BDEVNAME_SIZE];
@@ -961,13 +1220,9 @@
 	 * else mark the drive as failed
 	 */
 	if (test_bit(In_sync, &rdev->flags)
-	    && conf->raid_disks-mddev->degraded == 1)
+	    && !enough(conf, rdev->raid_disk))
 		/*
 		 * Don't fail the drive, just return an IO error.
-		 * The test should really be more sophisticated than
-		 * "working_disks == 1", but it isn't critical, and
-		 * can wait until we do more sophisticated "is the drive
-		 * really dead" tests...
 		 */
 		return;
 	if (test_and_clear_bit(In_sync, &rdev->flags)) {
@@ -980,6 +1235,7 @@
 		 */
 		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	}
+	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 	printk(KERN_ALERT
@@ -1022,27 +1278,6 @@
 	conf->r10buf_pool = NULL;
 }
 
-/* check if there are enough drives for
- * every block to appear on atleast one
- */
-static int enough(conf_t *conf)
-{
-	int first = 0;
-
-	do {
-		int n = conf->copies;
-		int cnt = 0;
-		while (n--) {
-			if (conf->mirrors[first].rdev)
-				cnt++;
-			first = (first+1) % conf->raid_disks;
-		}
-		if (cnt == 0)
-			return 0;
-	} while (first != 0);
-	return 1;
-}
-
 static int raid10_spare_active(mddev_t *mddev)
 {
 	int i;
@@ -1078,7 +1313,6 @@
 	conf_t *conf = mddev->private;
 	int err = -EEXIST;
 	int mirror;
-	mirror_info_t *p;
 	int first = 0;
 	int last = conf->raid_disks - 1;
 
@@ -1087,44 +1321,47 @@
 		 * very different from resync
 		 */
 		return -EBUSY;
-	if (!enough(conf))
+	if (!enough(conf, -1))
 		return -EINVAL;
 
 	if (rdev->raid_disk >= 0)
 		first = last = rdev->raid_disk;
 
-	if (rdev->saved_raid_disk >= 0 &&
-	    rdev->saved_raid_disk >= first &&
+	if (rdev->saved_raid_disk >= first &&
 	    conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
 		mirror = rdev->saved_raid_disk;
 	else
 		mirror = first;
-	for ( ; mirror <= last ; mirror++)
-		if ( !(p=conf->mirrors+mirror)->rdev) {
+	for ( ; mirror <= last ; mirror++) {
+		mirror_info_t *p = &conf->mirrors[mirror];
+		if (p->recovery_disabled == mddev->recovery_disabled)
+			continue;
+		if (!p->rdev)
+			continue;
 
-			disk_stack_limits(mddev->gendisk, rdev->bdev,
-					  rdev->data_offset << 9);
-			/* as we don't honour merge_bvec_fn, we must
-			 * never risk violating it, so limit
-			 * ->max_segments to one lying with a single
-			 * page, as a one page request is never in
-			 * violation.
-			 */
-			if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
-				blk_queue_max_segments(mddev->queue, 1);
-				blk_queue_segment_boundary(mddev->queue,
-							   PAGE_CACHE_SIZE - 1);
-			}
-
-			p->head_position = 0;
-			rdev->raid_disk = mirror;
-			err = 0;
-			if (rdev->saved_raid_disk != mirror)
-				conf->fullsync = 1;
-			rcu_assign_pointer(p->rdev, rdev);
-			break;
+		disk_stack_limits(mddev->gendisk, rdev->bdev,
+				  rdev->data_offset << 9);
+		/* as we don't honour merge_bvec_fn, we must
+		 * never risk violating it, so limit
+		 * ->max_segments to one lying with a single
+		 * page, as a one page request is never in
+		 * violation.
+		 */
+		if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+			blk_queue_max_segments(mddev->queue, 1);
+			blk_queue_segment_boundary(mddev->queue,
+						   PAGE_CACHE_SIZE - 1);
 		}
 
+		p->head_position = 0;
+		rdev->raid_disk = mirror;
+		err = 0;
+		if (rdev->saved_raid_disk != mirror)
+			conf->fullsync = 1;
+		rcu_assign_pointer(p->rdev, rdev);
+		break;
+	}
+
 	md_integrity_add_rdev(rdev, mddev);
 	print_conf(conf);
 	return err;
@@ -1149,7 +1386,8 @@
 		 * is not possible.
 		 */
 		if (!test_bit(Faulty, &rdev->flags) &&
-		    enough(conf)) {
+		    mddev->recovery_disabled != p->recovery_disabled &&
+		    enough(conf, -1)) {
 			err = -EBUSY;
 			goto abort;
 		}
@@ -1174,24 +1412,18 @@
 {
 	r10bio_t *r10_bio = bio->bi_private;
 	conf_t *conf = r10_bio->mddev->private;
-	int i,d;
+	int d;
 
-	for (i=0; i<conf->copies; i++)
-		if (r10_bio->devs[i].bio == bio)
-			break;
-	BUG_ON(i == conf->copies);
-	update_head_pos(i, r10_bio);
-	d = r10_bio->devs[i].devnum;
+	d = find_bio_disk(conf, r10_bio, bio, NULL);
 
 	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
-	else {
+	else
+		/* The write handler will notice the lack of
+		 * R10BIO_Uptodate and record any errors etc
+		 */
 		atomic_add(r10_bio->sectors,
 			   &conf->mirrors[d].rdev->corrected_errors);
-		if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
-			md_error(r10_bio->mddev,
-				 conf->mirrors[d].rdev);
-	}
 
 	/* for reconstruct, we always reschedule after a read.
 	 * for resync, only after all reads
@@ -1206,38 +1438,58 @@
 	}
 }
 
+static void end_sync_request(r10bio_t *r10_bio)
+{
+	mddev_t *mddev = r10_bio->mddev;
+
+	while (atomic_dec_and_test(&r10_bio->remaining)) {
+		if (r10_bio->master_bio == NULL) {
+			/* the primary of several recovery bios */
+			sector_t s = r10_bio->sectors;
+			if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+			    test_bit(R10BIO_WriteError, &r10_bio->state))
+				reschedule_retry(r10_bio);
+			else
+				put_buf(r10_bio);
+			md_done_sync(mddev, s, 1);
+			break;
+		} else {
+			r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
+			if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+			    test_bit(R10BIO_WriteError, &r10_bio->state))
+				reschedule_retry(r10_bio);
+			else
+				put_buf(r10_bio);
+			r10_bio = r10_bio2;
+		}
+	}
+}
+
 static void end_sync_write(struct bio *bio, int error)
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r10bio_t *r10_bio = bio->bi_private;
 	mddev_t *mddev = r10_bio->mddev;
 	conf_t *conf = mddev->private;
-	int i,d;
+	int d;
+	sector_t first_bad;
+	int bad_sectors;
+	int slot;
 
-	for (i = 0; i < conf->copies; i++)
-		if (r10_bio->devs[i].bio == bio)
-			break;
-	d = r10_bio->devs[i].devnum;
+	d = find_bio_disk(conf, r10_bio, bio, &slot);
 
-	if (!uptodate)
-		md_error(mddev, conf->mirrors[d].rdev);
-
-	update_head_pos(i, r10_bio);
+	if (!uptodate) {
+		set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
+		set_bit(R10BIO_WriteError, &r10_bio->state);
+	} else if (is_badblock(conf->mirrors[d].rdev,
+			     r10_bio->devs[slot].addr,
+			     r10_bio->sectors,
+			     &first_bad, &bad_sectors))
+		set_bit(R10BIO_MadeGood, &r10_bio->state);
 
 	rdev_dec_pending(conf->mirrors[d].rdev, mddev);
-	while (atomic_dec_and_test(&r10_bio->remaining)) {
-		if (r10_bio->master_bio == NULL) {
-			/* the primary of several recovery bios */
-			sector_t s = r10_bio->sectors;
-			put_buf(r10_bio);
-			md_done_sync(mddev, s, 1);
-			break;
-		} else {
-			r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
-			put_buf(r10_bio);
-			r10_bio = r10_bio2;
-		}
-	}
+
+	end_sync_request(r10_bio);
 }
 
 /*
@@ -1299,11 +1551,12 @@
 			if (j == vcnt)
 				continue;
 			mddev->resync_mismatches += r10_bio->sectors;
+			if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+				/* Don't fix anything. */
+				continue;
 		}
-		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
-			/* Don't fix anything. */
-			continue;
-		/* Ok, we need to write this bio
+		/* Ok, we need to write this bio, either to correct an
+		 * inconsistency or to correct an unreadable block.
 		 * First we need to fixup bv_offset, bv_len and
 		 * bi_vecs, as the read request might have corrupted these
 		 */
@@ -1355,32 +1608,107 @@
  * The second for writing.
  *
  */
+static void fix_recovery_read_error(r10bio_t *r10_bio)
+{
+	/* We got a read error during recovery.
+	 * We repeat the read in smaller page-sized sections.
+	 * If a read succeeds, write it to the new device or record
+	 * a bad block if we cannot.
+	 * If a read fails, record a bad block on both old and
+	 * new devices.
+	 */
+	mddev_t *mddev = r10_bio->mddev;
+	conf_t *conf = mddev->private;
+	struct bio *bio = r10_bio->devs[0].bio;
+	sector_t sect = 0;
+	int sectors = r10_bio->sectors;
+	int idx = 0;
+	int dr = r10_bio->devs[0].devnum;
+	int dw = r10_bio->devs[1].devnum;
+
+	while (sectors) {
+		int s = sectors;
+		mdk_rdev_t *rdev;
+		sector_t addr;
+		int ok;
+
+		if (s > (PAGE_SIZE>>9))
+			s = PAGE_SIZE >> 9;
+
+		rdev = conf->mirrors[dr].rdev;
+		addr = r10_bio->devs[0].addr + sect,
+		ok = sync_page_io(rdev,
+				  addr,
+				  s << 9,
+				  bio->bi_io_vec[idx].bv_page,
+				  READ, false);
+		if (ok) {
+			rdev = conf->mirrors[dw].rdev;
+			addr = r10_bio->devs[1].addr + sect;
+			ok = sync_page_io(rdev,
+					  addr,
+					  s << 9,
+					  bio->bi_io_vec[idx].bv_page,
+					  WRITE, false);
+			if (!ok)
+				set_bit(WriteErrorSeen, &rdev->flags);
+		}
+		if (!ok) {
+			/* We don't worry if we cannot set a bad block -
+			 * it really is bad so there is no loss in not
+			 * recording it yet
+			 */
+			rdev_set_badblocks(rdev, addr, s, 0);
+
+			if (rdev != conf->mirrors[dw].rdev) {
+				/* need bad block on destination too */
+				mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev;
+				addr = r10_bio->devs[1].addr + sect;
+				ok = rdev_set_badblocks(rdev2, addr, s, 0);
+				if (!ok) {
+					/* just abort the recovery */
+					printk(KERN_NOTICE
+					       "md/raid10:%s: recovery aborted"
+					       " due to read error\n",
+					       mdname(mddev));
+
+					conf->mirrors[dw].recovery_disabled
+						= mddev->recovery_disabled;
+					set_bit(MD_RECOVERY_INTR,
+						&mddev->recovery);
+					break;
+				}
+			}
+		}
+
+		sectors -= s;
+		sect += s;
+		idx++;
+	}
+}
 
 static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 {
 	conf_t *conf = mddev->private;
-	int i, d;
-	struct bio *bio, *wbio;
+	int d;
+	struct bio *wbio;
 
+	if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
+		fix_recovery_read_error(r10_bio);
+		end_sync_request(r10_bio);
+		return;
+	}
 
-	/* move the pages across to the second bio
+	/*
+	 * share the pages with the first bio
 	 * and submit the write request
 	 */
-	bio = r10_bio->devs[0].bio;
 	wbio = r10_bio->devs[1].bio;
-	for (i=0; i < wbio->bi_vcnt; i++) {
-		struct page *p = bio->bi_io_vec[i].bv_page;
-		bio->bi_io_vec[i].bv_page = wbio->bi_io_vec[i].bv_page;
-		wbio->bi_io_vec[i].bv_page = p;
-	}
 	d = r10_bio->devs[1].devnum;
 
 	atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 	md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-	if (test_bit(R10BIO_Uptodate, &r10_bio->state))
-		generic_make_request(wbio);
-	else
-		bio_endio(wbio, -EIO);
+	generic_make_request(wbio);
 }
 
 
@@ -1421,6 +1749,26 @@
 		atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
 }
 
+static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+			    int sectors, struct page *page, int rw)
+{
+	sector_t first_bad;
+	int bad_sectors;
+
+	if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
+	    && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+		return -1;
+	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+		/* success */
+		return 1;
+	if (rw == WRITE)
+		set_bit(WriteErrorSeen, &rdev->flags);
+	/* need to record an error - either for the block or the device */
+	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+		md_error(rdev->mddev, rdev);
+	return 0;
+}
+
 /*
  * This is a kernel thread which:
  *
@@ -1476,10 +1824,15 @@
 
 		rcu_read_lock();
 		do {
+			sector_t first_bad;
+			int bad_sectors;
+
 			d = r10_bio->devs[sl].devnum;
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
+			    test_bit(In_sync, &rdev->flags) &&
+			    is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
+					&first_bad, &bad_sectors) == 0) {
 				atomic_inc(&rdev->nr_pending);
 				rcu_read_unlock();
 				success = sync_page_io(rdev,
@@ -1499,9 +1852,19 @@
 		rcu_read_unlock();
 
 		if (!success) {
-			/* Cannot read from anywhere -- bye bye array */
+			/* Cannot read from anywhere, just mark the block
+			 * as bad on the first device to discourage future
+			 * reads.
+			 */
 			int dn = r10_bio->devs[r10_bio->read_slot].devnum;
-			md_error(mddev, conf->mirrors[dn].rdev);
+			rdev = conf->mirrors[dn].rdev;
+
+			if (!rdev_set_badblocks(
+				    rdev,
+				    r10_bio->devs[r10_bio->read_slot].addr
+				    + sect,
+				    s, 0))
+				md_error(mddev, rdev);
 			break;
 		}
 
@@ -1516,80 +1879,82 @@
 			sl--;
 			d = r10_bio->devs[sl].devnum;
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
-			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				atomic_add(s, &rdev->corrected_errors);
-				if (sync_page_io(rdev,
-						 r10_bio->devs[sl].addr +
-						 sect,
-						 s<<9, conf->tmppage, WRITE, false)
-				    == 0) {
-					/* Well, this device is dead */
-					printk(KERN_NOTICE
-					       "md/raid10:%s: read correction "
-					       "write failed"
-					       " (%d sectors at %llu on %s)\n",
-					       mdname(mddev), s,
-					       (unsigned long long)(
-						       sect + rdev->data_offset),
-					       bdevname(rdev->bdev, b));
-					printk(KERN_NOTICE "md/raid10:%s: %s: failing "
-					       "drive\n",
-					       mdname(mddev),
-					       bdevname(rdev->bdev, b));
-					md_error(mddev, rdev);
-				}
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
+			if (!rdev ||
+			    !test_bit(In_sync, &rdev->flags))
+				continue;
+
+			atomic_inc(&rdev->nr_pending);
+			rcu_read_unlock();
+			if (r10_sync_page_io(rdev,
+					     r10_bio->devs[sl].addr +
+					     sect,
+					     s<<9, conf->tmppage, WRITE)
+			    == 0) {
+				/* Well, this device is dead */
+				printk(KERN_NOTICE
+				       "md/raid10:%s: read correction "
+				       "write failed"
+				       " (%d sectors at %llu on %s)\n",
+				       mdname(mddev), s,
+				       (unsigned long long)(
+					       sect + rdev->data_offset),
+				       bdevname(rdev->bdev, b));
+				printk(KERN_NOTICE "md/raid10:%s: %s: failing "
+				       "drive\n",
+				       mdname(mddev),
+				       bdevname(rdev->bdev, b));
 			}
+			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 		sl = start;
 		while (sl != r10_bio->read_slot) {
+			char b[BDEVNAME_SIZE];
 
 			if (sl==0)
 				sl = conf->copies;
 			sl--;
 			d = r10_bio->devs[sl].devnum;
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
-			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
-				char b[BDEVNAME_SIZE];
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				if (sync_page_io(rdev,
-						 r10_bio->devs[sl].addr +
-						 sect,
-						 s<<9, conf->tmppage,
-						 READ, false) == 0) {
-					/* Well, this device is dead */
-					printk(KERN_NOTICE
-					       "md/raid10:%s: unable to read back "
-					       "corrected sectors"
-					       " (%d sectors at %llu on %s)\n",
-					       mdname(mddev), s,
-					       (unsigned long long)(
-						       sect + rdev->data_offset),
-					       bdevname(rdev->bdev, b));
-					printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
-					       mdname(mddev),
-					       bdevname(rdev->bdev, b));
+			if (!rdev ||
+			    !test_bit(In_sync, &rdev->flags))
+				continue;
 
-					md_error(mddev, rdev);
-				} else {
-					printk(KERN_INFO
-					       "md/raid10:%s: read error corrected"
-					       " (%d sectors at %llu on %s)\n",
-					       mdname(mddev), s,
-					       (unsigned long long)(
-						       sect + rdev->data_offset),
-					       bdevname(rdev->bdev, b));
-				}
-
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
+			atomic_inc(&rdev->nr_pending);
+			rcu_read_unlock();
+			switch (r10_sync_page_io(rdev,
+					     r10_bio->devs[sl].addr +
+					     sect,
+					     s<<9, conf->tmppage,
+						 READ)) {
+			case 0:
+				/* Well, this device is dead */
+				printk(KERN_NOTICE
+				       "md/raid10:%s: unable to read back "
+				       "corrected sectors"
+				       " (%d sectors at %llu on %s)\n",
+				       mdname(mddev), s,
+				       (unsigned long long)(
+					       sect + rdev->data_offset),
+				       bdevname(rdev->bdev, b));
+				printk(KERN_NOTICE "md/raid10:%s: %s: failing "
+				       "drive\n",
+				       mdname(mddev),
+				       bdevname(rdev->bdev, b));
+				break;
+			case 1:
+				printk(KERN_INFO
+				       "md/raid10:%s: read error corrected"
+				       " (%d sectors at %llu on %s)\n",
+				       mdname(mddev), s,
+				       (unsigned long long)(
+					       sect + rdev->data_offset),
+				       bdevname(rdev->bdev, b));
+				atomic_add(s, &rdev->corrected_errors);
 			}
+
+			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 		rcu_read_unlock();
 
@@ -1598,21 +1963,254 @@
 	}
 }
 
+static void bi_complete(struct bio *bio, int error)
+{
+	complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+	struct completion event;
+	rw |= REQ_SYNC;
+
+	init_completion(&event);
+	bio->bi_private = &event;
+	bio->bi_end_io = bi_complete;
+	submit_bio(rw, bio);
+	wait_for_completion(&event);
+
+	return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r10bio_t *r10_bio, int i)
+{
+	struct bio *bio = r10_bio->master_bio;
+	mddev_t *mddev = r10_bio->mddev;
+	conf_t *conf = mddev->private;
+	mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
+	/* bio has the data to be written to slot 'i' where
+	 * we just recently had a write error.
+	 * We repeatedly clone the bio and trim down to one block,
+	 * then try the write.  Where the write fails we record
+	 * a bad block.
+	 * It is conceivable that the bio doesn't exactly align with
+	 * blocks.  We must handle this.
+	 *
+	 * We currently own a reference to the rdev.
+	 */
+
+	int block_sectors;
+	sector_t sector;
+	int sectors;
+	int sect_to_write = r10_bio->sectors;
+	int ok = 1;
+
+	if (rdev->badblocks.shift < 0)
+		return 0;
+
+	block_sectors = 1 << rdev->badblocks.shift;
+	sector = r10_bio->sector;
+	sectors = ((r10_bio->sector + block_sectors)
+		   & ~(sector_t)(block_sectors - 1))
+		- sector;
+
+	while (sect_to_write) {
+		struct bio *wbio;
+		if (sectors > sect_to_write)
+			sectors = sect_to_write;
+		/* Write at 'sector' for 'sectors' */
+		wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+		md_trim_bio(wbio, sector - bio->bi_sector, sectors);
+		wbio->bi_sector = (r10_bio->devs[i].addr+
+				   rdev->data_offset+
+				   (sector - r10_bio->sector));
+		wbio->bi_bdev = rdev->bdev;
+		if (submit_bio_wait(WRITE, wbio) == 0)
+			/* Failure! */
+			ok = rdev_set_badblocks(rdev, sector,
+						sectors, 0)
+				&& ok;
+
+		bio_put(wbio);
+		sect_to_write -= sectors;
+		sector += sectors;
+		sectors = block_sectors;
+	}
+	return ok;
+}
+
+static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio)
+{
+	int slot = r10_bio->read_slot;
+	int mirror = r10_bio->devs[slot].devnum;
+	struct bio *bio;
+	conf_t *conf = mddev->private;
+	mdk_rdev_t *rdev;
+	char b[BDEVNAME_SIZE];
+	unsigned long do_sync;
+	int max_sectors;
+
+	/* we got a read error. Maybe the drive is bad.  Maybe just
+	 * the block and we can fix it.
+	 * We freeze all other IO, and try reading the block from
+	 * other devices.  When we find one, we re-write
+	 * and check it that fixes the read error.
+	 * This is all done synchronously while the array is
+	 * frozen.
+	 */
+	if (mddev->ro == 0) {
+		freeze_array(conf);
+		fix_read_error(conf, mddev, r10_bio);
+		unfreeze_array(conf);
+	}
+	rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
+
+	bio = r10_bio->devs[slot].bio;
+	bdevname(bio->bi_bdev, b);
+	r10_bio->devs[slot].bio =
+		mddev->ro ? IO_BLOCKED : NULL;
+read_more:
+	mirror = read_balance(conf, r10_bio, &max_sectors);
+	if (mirror == -1) {
+		printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
+		       " read error for block %llu\n",
+		       mdname(mddev), b,
+		       (unsigned long long)r10_bio->sector);
+		raid_end_bio_io(r10_bio);
+		bio_put(bio);
+		return;
+	}
+
+	do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
+	if (bio)
+		bio_put(bio);
+	slot = r10_bio->read_slot;
+	rdev = conf->mirrors[mirror].rdev;
+	printk_ratelimited(
+		KERN_ERR
+		"md/raid10:%s: %s: redirecting"
+		"sector %llu to another mirror\n",
+		mdname(mddev),
+		bdevname(rdev->bdev, b),
+		(unsigned long long)r10_bio->sector);
+	bio = bio_clone_mddev(r10_bio->master_bio,
+			      GFP_NOIO, mddev);
+	md_trim_bio(bio,
+		    r10_bio->sector - bio->bi_sector,
+		    max_sectors);
+	r10_bio->devs[slot].bio = bio;
+	bio->bi_sector = r10_bio->devs[slot].addr
+		+ rdev->data_offset;
+	bio->bi_bdev = rdev->bdev;
+	bio->bi_rw = READ | do_sync;
+	bio->bi_private = r10_bio;
+	bio->bi_end_io = raid10_end_read_request;
+	if (max_sectors < r10_bio->sectors) {
+		/* Drat - have to split this up more */
+		struct bio *mbio = r10_bio->master_bio;
+		int sectors_handled =
+			r10_bio->sector + max_sectors
+			- mbio->bi_sector;
+		r10_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (mbio->bi_phys_segments == 0)
+			mbio->bi_phys_segments = 2;
+		else
+			mbio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		generic_make_request(bio);
+		bio = NULL;
+
+		r10_bio = mempool_alloc(conf->r10bio_pool,
+					GFP_NOIO);
+		r10_bio->master_bio = mbio;
+		r10_bio->sectors = (mbio->bi_size >> 9)
+			- sectors_handled;
+		r10_bio->state = 0;
+		set_bit(R10BIO_ReadError,
+			&r10_bio->state);
+		r10_bio->mddev = mddev;
+		r10_bio->sector = mbio->bi_sector
+			+ sectors_handled;
+
+		goto read_more;
+	} else
+		generic_make_request(bio);
+}
+
+static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio)
+{
+	/* Some sort of write request has finished and it
+	 * succeeded in writing where we thought there was a
+	 * bad block.  So forget the bad block.
+	 * Or possibly if failed and we need to record
+	 * a bad block.
+	 */
+	int m;
+	mdk_rdev_t *rdev;
+
+	if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
+	    test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+		for (m = 0; m < conf->copies; m++) {
+			int dev = r10_bio->devs[m].devnum;
+			rdev = conf->mirrors[dev].rdev;
+			if (r10_bio->devs[m].bio == NULL)
+				continue;
+			if (test_bit(BIO_UPTODATE,
+				     &r10_bio->devs[m].bio->bi_flags)) {
+				rdev_clear_badblocks(
+					rdev,
+					r10_bio->devs[m].addr,
+					r10_bio->sectors);
+			} else {
+				if (!rdev_set_badblocks(
+					    rdev,
+					    r10_bio->devs[m].addr,
+					    r10_bio->sectors, 0))
+					md_error(conf->mddev, rdev);
+			}
+		}
+		put_buf(r10_bio);
+	} else {
+		for (m = 0; m < conf->copies; m++) {
+			int dev = r10_bio->devs[m].devnum;
+			struct bio *bio = r10_bio->devs[m].bio;
+			rdev = conf->mirrors[dev].rdev;
+			if (bio == IO_MADE_GOOD) {
+				rdev_clear_badblocks(
+					rdev,
+					r10_bio->devs[m].addr,
+					r10_bio->sectors);
+				rdev_dec_pending(rdev, conf->mddev);
+			} else if (bio != NULL &&
+				   !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+				if (!narrow_write_error(r10_bio, m)) {
+					md_error(conf->mddev, rdev);
+					set_bit(R10BIO_Degraded,
+						&r10_bio->state);
+				}
+				rdev_dec_pending(rdev, conf->mddev);
+			}
+		}
+		if (test_bit(R10BIO_WriteError,
+			     &r10_bio->state))
+			close_write(r10_bio);
+		raid_end_bio_io(r10_bio);
+	}
+}
+
 static void raid10d(mddev_t *mddev)
 {
 	r10bio_t *r10_bio;
-	struct bio *bio;
 	unsigned long flags;
 	conf_t *conf = mddev->private;
 	struct list_head *head = &conf->retry_list;
-	mdk_rdev_t *rdev;
 	struct blk_plug plug;
 
 	md_check_recovery(mddev);
 
 	blk_start_plug(&plug);
 	for (;;) {
-		char b[BDEVNAME_SIZE];
 
 		flush_pending_writes(conf);
 
@@ -1628,64 +2226,26 @@
 
 		mddev = r10_bio->mddev;
 		conf = mddev->private;
-		if (test_bit(R10BIO_IsSync, &r10_bio->state))
+		if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+		    test_bit(R10BIO_WriteError, &r10_bio->state))
+			handle_write_completed(conf, r10_bio);
+		else if (test_bit(R10BIO_IsSync, &r10_bio->state))
 			sync_request_write(mddev, r10_bio);
 		else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
 			recovery_request_write(mddev, r10_bio);
+		else if (test_bit(R10BIO_ReadError, &r10_bio->state))
+			handle_read_error(mddev, r10_bio);
 		else {
-			int slot = r10_bio->read_slot;
-			int mirror = r10_bio->devs[slot].devnum;
-			/* we got a read error. Maybe the drive is bad.  Maybe just
-			 * the block and we can fix it.
-			 * We freeze all other IO, and try reading the block from
-			 * other devices.  When we find one, we re-write
-			 * and check it that fixes the read error.
-			 * This is all done synchronously while the array is
-			 * frozen.
+			/* just a partial read to be scheduled from a
+			 * separate context
 			 */
-			if (mddev->ro == 0) {
-				freeze_array(conf);
-				fix_read_error(conf, mddev, r10_bio);
-				unfreeze_array(conf);
-			}
-			rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
-
-			bio = r10_bio->devs[slot].bio;
-			r10_bio->devs[slot].bio =
-				mddev->ro ? IO_BLOCKED : NULL;
-			mirror = read_balance(conf, r10_bio);
-			if (mirror == -1) {
-				printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
-				       " read error for block %llu\n",
-				       mdname(mddev),
-				       bdevname(bio->bi_bdev,b),
-				       (unsigned long long)r10_bio->sector);
-				raid_end_bio_io(r10_bio);
-				bio_put(bio);
-			} else {
-				const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
-				bio_put(bio);
-				slot = r10_bio->read_slot;
-				rdev = conf->mirrors[mirror].rdev;
-				if (printk_ratelimit())
-					printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
-					       " another mirror\n",
-					       mdname(mddev),
-					       bdevname(rdev->bdev,b),
-					       (unsigned long long)r10_bio->sector);
-				bio = bio_clone_mddev(r10_bio->master_bio,
-						      GFP_NOIO, mddev);
-				r10_bio->devs[slot].bio = bio;
-				bio->bi_sector = r10_bio->devs[slot].addr
-					+ rdev->data_offset;
-				bio->bi_bdev = rdev->bdev;
-				bio->bi_rw = READ | do_sync;
-				bio->bi_private = r10_bio;
-				bio->bi_end_io = raid10_end_read_request;
-				generic_make_request(bio);
-			}
+			int slot = r10_bio->read_slot;
+			generic_make_request(r10_bio->devs[slot].bio);
 		}
+
 		cond_resched();
+		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+			md_check_recovery(mddev);
 	}
 	blk_finish_plug(&plug);
 }
@@ -1746,7 +2306,6 @@
 	int i;
 	int max_sync;
 	sector_t sync_blocks;
-
 	sector_t sectors_skipped = 0;
 	int chunks_skipped = 0;
 
@@ -1828,7 +2387,7 @@
 	max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
 	if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
 		/* recovery... the complicated one */
-		int j, k;
+		int j;
 		r10_bio = NULL;
 
 		for (i=0 ; i<conf->raid_disks; i++) {
@@ -1836,6 +2395,7 @@
 			r10bio_t *rb2;
 			sector_t sect;
 			int must_sync;
+			int any_working;
 
 			if (conf->mirrors[i].rdev == NULL ||
 			    test_bit(In_sync, &conf->mirrors[i].rdev->flags)) 
@@ -1887,19 +2447,42 @@
 			must_sync = bitmap_start_sync(mddev->bitmap, sect,
 						      &sync_blocks, still_degraded);
 
+			any_working = 0;
 			for (j=0; j<conf->copies;j++) {
+				int k;
 				int d = r10_bio->devs[j].devnum;
+				sector_t from_addr, to_addr;
+				mdk_rdev_t *rdev;
+				sector_t sector, first_bad;
+				int bad_sectors;
 				if (!conf->mirrors[d].rdev ||
 				    !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
 					continue;
 				/* This is where we read from */
+				any_working = 1;
+				rdev = conf->mirrors[d].rdev;
+				sector = r10_bio->devs[j].addr;
+
+				if (is_badblock(rdev, sector, max_sync,
+						&first_bad, &bad_sectors)) {
+					if (first_bad > sector)
+						max_sync = first_bad - sector;
+					else {
+						bad_sectors -= (sector
+								- first_bad);
+						if (max_sync > bad_sectors)
+							max_sync = bad_sectors;
+						continue;
+					}
+				}
 				bio = r10_bio->devs[0].bio;
 				bio->bi_next = biolist;
 				biolist = bio;
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = end_sync_read;
 				bio->bi_rw = READ;
-				bio->bi_sector = r10_bio->devs[j].addr +
+				from_addr = r10_bio->devs[j].addr;
+				bio->bi_sector = from_addr +
 					conf->mirrors[d].rdev->data_offset;
 				bio->bi_bdev = conf->mirrors[d].rdev->bdev;
 				atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@ -1916,26 +2499,48 @@
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = end_sync_write;
 				bio->bi_rw = WRITE;
-				bio->bi_sector = r10_bio->devs[k].addr +
+				to_addr = r10_bio->devs[k].addr;
+				bio->bi_sector = to_addr +
 					conf->mirrors[i].rdev->data_offset;
 				bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 
 				r10_bio->devs[0].devnum = d;
+				r10_bio->devs[0].addr = from_addr;
 				r10_bio->devs[1].devnum = i;
+				r10_bio->devs[1].addr = to_addr;
 
 				break;
 			}
 			if (j == conf->copies) {
-				/* Cannot recover, so abort the recovery */
+				/* Cannot recover, so abort the recovery or
+				 * record a bad block */
 				put_buf(r10_bio);
 				if (rb2)
 					atomic_dec(&rb2->remaining);
 				r10_bio = rb2;
-				if (!test_and_set_bit(MD_RECOVERY_INTR,
-						      &mddev->recovery))
-					printk(KERN_INFO "md/raid10:%s: insufficient "
-					       "working devices for recovery.\n",
-					       mdname(mddev));
+				if (any_working) {
+					/* problem is that there are bad blocks
+					 * on other device(s)
+					 */
+					int k;
+					for (k = 0; k < conf->copies; k++)
+						if (r10_bio->devs[k].devnum == i)
+							break;
+					if (!rdev_set_badblocks(
+						    conf->mirrors[i].rdev,
+						    r10_bio->devs[k].addr,
+						    max_sync, 0))
+						any_working = 0;
+				}
+				if (!any_working)  {
+					if (!test_and_set_bit(MD_RECOVERY_INTR,
+							      &mddev->recovery))
+						printk(KERN_INFO "md/raid10:%s: insufficient "
+						       "working devices for recovery.\n",
+						       mdname(mddev));
+					conf->mirrors[i].recovery_disabled
+						= mddev->recovery_disabled;
+				}
 				break;
 			}
 		}
@@ -1979,12 +2584,28 @@
 
 		for (i=0; i<conf->copies; i++) {
 			int d = r10_bio->devs[i].devnum;
+			sector_t first_bad, sector;
+			int bad_sectors;
+
 			bio = r10_bio->devs[i].bio;
 			bio->bi_end_io = NULL;
 			clear_bit(BIO_UPTODATE, &bio->bi_flags);
 			if (conf->mirrors[d].rdev == NULL ||
 			    test_bit(Faulty, &conf->mirrors[d].rdev->flags))
 				continue;
+			sector = r10_bio->devs[i].addr;
+			if (is_badblock(conf->mirrors[d].rdev,
+					sector, max_sync,
+					&first_bad, &bad_sectors)) {
+				if (first_bad > sector)
+					max_sync = first_bad - sector;
+				else {
+					bad_sectors -= (sector - first_bad);
+					if (max_sync > bad_sectors)
+						max_sync = max_sync;
+					continue;
+				}
+			}
 			atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 			atomic_inc(&r10_bio->remaining);
 			bio->bi_next = biolist;
@@ -1992,7 +2613,7 @@
 			bio->bi_private = r10_bio;
 			bio->bi_end_io = end_sync_read;
 			bio->bi_rw = READ;
-			bio->bi_sector = r10_bio->devs[i].addr +
+			bio->bi_sector = sector +
 				conf->mirrors[d].rdev->data_offset;
 			bio->bi_bdev = conf->mirrors[d].rdev->bdev;
 			count++;
@@ -2079,7 +2700,8 @@
 	return sectors_skipped + nr_sectors;
  giveup:
 	/* There is nowhere to write, so all non-sync
-	 * drives must be failed, so try the next chunk...
+	 * drives must be failed or in resync, all drives
+	 * have a bad block, so try the next chunk...
 	 */
 	if (sector_nr + max_sync < max_sector)
 		max_sector = sector_nr + max_sync;
@@ -2249,6 +2871,7 @@
 				 (conf->raid_disks / conf->near_copies));
 
 	list_for_each_entry(rdev, &mddev->disks, same_set) {
+
 		disk_idx = rdev->raid_disk;
 		if (disk_idx >= conf->raid_disks
 		    || disk_idx < 0)
@@ -2271,7 +2894,7 @@
 		disk->head_position = 0;
 	}
 	/* need to check that every block has at least one working mirror */
-	if (!enough(conf)) {
+	if (!enough(conf, -1)) {
 		printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
 		       mdname(mddev));
 		goto out_free_conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 944b110..79cb52a 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -6,6 +6,11 @@
 struct mirror_info {
 	mdk_rdev_t	*rdev;
 	sector_t	head_position;
+	int		recovery_disabled;	/* matches
+						 * mddev->recovery_disabled
+						 * when we shouldn't try
+						 * recovering this device.
+						 */
 };
 
 typedef struct r10bio_s r10bio_t;
@@ -113,10 +118,26 @@
  * level, we store IO_BLOCKED in the appropriate 'bios' pointer
  */
 #define IO_BLOCKED ((struct bio*)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context.  So we record
+ * the success by setting devs[n].bio to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
 
 /* bits for r10bio.state */
 #define	R10BIO_Uptodate	0
 #define	R10BIO_IsSync	1
 #define	R10BIO_IsRecover 2
 #define	R10BIO_Degraded 3
+/* Set ReadError on bios that experience a read error
+ * so that raid10d knows what to do with them.
+ */
+#define	R10BIO_ReadError 4
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag.
+ */
+#define	R10BIO_MadeGood 5
+#define	R10BIO_WriteError 6
 #endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b72edf3..dbae459 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -51,6 +51,7 @@
 #include <linux/seq_file.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include "md.h"
 #include "raid5.h"
 #include "raid0.h"
@@ -96,8 +97,6 @@
 #define __inline__
 #endif
 
-#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
-
 /*
  * We maintain a biased count of active stripes in the bottom 16 bits of
  * bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -341,7 +340,7 @@
 			       (unsigned long long)sh->sector, i, dev->toread,
 			       dev->read, dev->towrite, dev->written,
 			       test_bit(R5_LOCKED, &dev->flags));
-			BUG();
+			WARN_ON(1);
 		}
 		dev->flags = 0;
 		raid5_build_block(sh, i, previous);
@@ -527,6 +526,36 @@
 			atomic_inc(&rdev->nr_pending);
 		rcu_read_unlock();
 
+		/* We have already checked bad blocks for reads.  Now
+		 * need to check for writes.
+		 */
+		while ((rw & WRITE) && rdev &&
+		       test_bit(WriteErrorSeen, &rdev->flags)) {
+			sector_t first_bad;
+			int bad_sectors;
+			int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+					      &first_bad, &bad_sectors);
+			if (!bad)
+				break;
+
+			if (bad < 0) {
+				set_bit(BlockedBadBlocks, &rdev->flags);
+				if (!conf->mddev->external &&
+				    conf->mddev->flags) {
+					/* It is very unlikely, but we might
+					 * still need to write out the
+					 * bad block log - better give it
+					 * a chance*/
+					md_check_recovery(conf->mddev);
+				}
+				md_wait_for_blocked_rdev(rdev, conf->mddev);
+			} else {
+				/* Acknowledged bad block - skip the write */
+				rdev_dec_pending(rdev, conf->mddev);
+				rdev = NULL;
+			}
+		}
+
 		if (rdev) {
 			if (s->syncing || s->expanding || s->expanded)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
@@ -548,10 +577,6 @@
 			bi->bi_io_vec[0].bv_offset = 0;
 			bi->bi_size = STRIPE_SIZE;
 			bi->bi_next = NULL;
-			if ((rw & WRITE) &&
-			    test_bit(R5_ReWrite, &sh->dev[i].flags))
-				atomic_add(STRIPE_SECTORS,
-					&rdev->corrected_errors);
 			generic_make_request(bi);
 		} else {
 			if (rw & WRITE)
@@ -1020,12 +1045,12 @@
 		if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
 			struct bio *wbi;
 
-			spin_lock(&sh->lock);
+			spin_lock_irq(&sh->raid_conf->device_lock);
 			chosen = dev->towrite;
 			dev->towrite = NULL;
 			BUG_ON(dev->written);
 			wbi = dev->written = chosen;
-			spin_unlock(&sh->lock);
+			spin_unlock_irq(&sh->raid_conf->device_lock);
 
 			while (wbi && wbi->bi_sector <
 				dev->sector + STRIPE_SECTORS) {
@@ -1315,12 +1340,11 @@
 static int grow_one_stripe(raid5_conf_t *conf)
 {
 	struct stripe_head *sh;
-	sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
+	sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
 	if (!sh)
 		return 0;
-	memset(sh, 0, sizeof(*sh) + (conf->pool_size-1)*sizeof(struct r5dev));
+
 	sh->raid_conf = conf;
-	spin_lock_init(&sh->lock);
 	#ifdef CONFIG_MULTICORE_RAID456
 	init_waitqueue_head(&sh->ops.wait_for_ops);
 	#endif
@@ -1435,14 +1459,11 @@
 		return -ENOMEM;
 
 	for (i = conf->max_nr_stripes; i; i--) {
-		nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+		nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
 		if (!nsh)
 			break;
 
-		memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
-
 		nsh->raid_conf = conf;
-		spin_lock_init(&nsh->lock);
 		#ifdef CONFIG_MULTICORE_RAID456
 		init_waitqueue_head(&nsh->ops.wait_for_ops);
 		#endif
@@ -1587,12 +1608,15 @@
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			rdev = conf->disks[i].rdev;
-			printk_rl(KERN_INFO "md/raid:%s: read error corrected"
-				  " (%lu sectors at %llu on %s)\n",
-				  mdname(conf->mddev), STRIPE_SECTORS,
-				  (unsigned long long)(sh->sector
-						       + rdev->data_offset),
-				  bdevname(rdev->bdev, b));
+			printk_ratelimited(
+				KERN_INFO
+				"md/raid:%s: read error corrected"
+				" (%lu sectors at %llu on %s)\n",
+				mdname(conf->mddev), STRIPE_SECTORS,
+				(unsigned long long)(sh->sector
+						     + rdev->data_offset),
+				bdevname(rdev->bdev, b));
+			atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
 			clear_bit(R5_ReadError, &sh->dev[i].flags);
 			clear_bit(R5_ReWrite, &sh->dev[i].flags);
 		}
@@ -1606,22 +1630,24 @@
 		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
 		atomic_inc(&rdev->read_errors);
 		if (conf->mddev->degraded >= conf->max_degraded)
-			printk_rl(KERN_WARNING
-				  "md/raid:%s: read error not correctable "
-				  "(sector %llu on %s).\n",
-				  mdname(conf->mddev),
-				  (unsigned long long)(sh->sector
-						       + rdev->data_offset),
-				  bdn);
+			printk_ratelimited(
+				KERN_WARNING
+				"md/raid:%s: read error not correctable "
+				"(sector %llu on %s).\n",
+				mdname(conf->mddev),
+				(unsigned long long)(sh->sector
+						     + rdev->data_offset),
+				bdn);
 		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
 			/* Oh, no!!! */
-			printk_rl(KERN_WARNING
-				  "md/raid:%s: read error NOT corrected!! "
-				  "(sector %llu on %s).\n",
-				  mdname(conf->mddev),
-				  (unsigned long long)(sh->sector
-						       + rdev->data_offset),
-				  bdn);
+			printk_ratelimited(
+				KERN_WARNING
+				"md/raid:%s: read error NOT corrected!! "
+				"(sector %llu on %s).\n",
+				mdname(conf->mddev),
+				(unsigned long long)(sh->sector
+						     + rdev->data_offset),
+				bdn);
 		else if (atomic_read(&rdev->read_errors)
 			 > conf->max_nr_stripes)
 			printk(KERN_WARNING
@@ -1649,6 +1675,8 @@
 	raid5_conf_t *conf = sh->raid_conf;
 	int disks = sh->disks, i;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+	sector_t first_bad;
+	int bad_sectors;
 
 	for (i=0 ; i<disks; i++)
 		if (bi == &sh->dev[i].req)
@@ -1662,8 +1690,12 @@
 		return;
 	}
 
-	if (!uptodate)
-		md_error(conf->mddev, conf->disks[i].rdev);
+	if (!uptodate) {
+		set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
+		set_bit(R5_WriteError, &sh->dev[i].flags);
+	} else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
+			       &first_bad, &bad_sectors))
+		set_bit(R5_MadeGood, &sh->dev[i].flags);
 
 	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
 	
@@ -1710,6 +1742,7 @@
 		 */
 		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	}
+	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 	printk(KERN_ALERT
@@ -1760,7 +1793,7 @@
 	/*
 	 * Select the parity disk based on the user selected algorithm.
 	 */
-	pd_idx = qd_idx = ~0;
+	pd_idx = qd_idx = -1;
 	switch(conf->level) {
 	case 4:
 		pd_idx = data_disks;
@@ -2143,12 +2176,11 @@
 	raid5_conf_t *conf = sh->raid_conf;
 	int firstwrite=0;
 
-	pr_debug("adding bh b#%llu to stripe s#%llu\n",
+	pr_debug("adding bi b#%llu to stripe s#%llu\n",
 		(unsigned long long)bi->bi_sector,
 		(unsigned long long)sh->sector);
 
 
-	spin_lock(&sh->lock);
 	spin_lock_irq(&conf->device_lock);
 	if (forwrite) {
 		bip = &sh->dev[dd_idx].towrite;
@@ -2169,19 +2201,6 @@
 		bi->bi_next = *bip;
 	*bip = bi;
 	bi->bi_phys_segments++;
-	spin_unlock_irq(&conf->device_lock);
-	spin_unlock(&sh->lock);
-
-	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
-		(unsigned long long)bi->bi_sector,
-		(unsigned long long)sh->sector, dd_idx);
-
-	if (conf->mddev->bitmap && firstwrite) {
-		bitmap_startwrite(conf->mddev->bitmap, sh->sector,
-				  STRIPE_SECTORS, 0);
-		sh->bm_seq = conf->seq_flush+1;
-		set_bit(STRIPE_BIT_DELAY, &sh->state);
-	}
 
 	if (forwrite) {
 		/* check if page is covered */
@@ -2196,12 +2215,23 @@
 		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
 			set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
 	}
+	spin_unlock_irq(&conf->device_lock);
+
+	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
+		(unsigned long long)(*bip)->bi_sector,
+		(unsigned long long)sh->sector, dd_idx);
+
+	if (conf->mddev->bitmap && firstwrite) {
+		bitmap_startwrite(conf->mddev->bitmap, sh->sector,
+				  STRIPE_SECTORS, 0);
+		sh->bm_seq = conf->seq_flush+1;
+		set_bit(STRIPE_BIT_DELAY, &sh->state);
+	}
 	return 1;
 
  overlap:
 	set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
 	spin_unlock_irq(&conf->device_lock);
-	spin_unlock(&sh->lock);
 	return 0;
 }
 
@@ -2238,9 +2268,18 @@
 			rcu_read_lock();
 			rdev = rcu_dereference(conf->disks[i].rdev);
 			if (rdev && test_bit(In_sync, &rdev->flags))
-				/* multiple read failures in one stripe */
-				md_error(conf->mddev, rdev);
+				atomic_inc(&rdev->nr_pending);
+			else
+				rdev = NULL;
 			rcu_read_unlock();
+			if (rdev) {
+				if (!rdev_set_badblocks(
+					    rdev,
+					    sh->sector,
+					    STRIPE_SECTORS, 0))
+					md_error(conf->mddev, rdev);
+				rdev_dec_pending(rdev, conf->mddev);
+			}
 		}
 		spin_lock_irq(&conf->device_lock);
 		/* fail all writes first */
@@ -2308,6 +2347,10 @@
 		if (bitmap_end)
 			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
 					STRIPE_SECTORS, 0, 0);
+		/* If we were in the middle of a write the parity block might
+		 * still be locked - so just clear all R5_LOCKED flags
+		 */
+		clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	}
 
 	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -2315,17 +2358,53 @@
 			md_wakeup_thread(conf->mddev->thread);
 }
 
-/* fetch_block5 - checks the given member device to see if its data needs
+static void
+handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
+		   struct stripe_head_state *s)
+{
+	int abort = 0;
+	int i;
+
+	md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
+	clear_bit(STRIPE_SYNCING, &sh->state);
+	s->syncing = 0;
+	/* There is nothing more to do for sync/check/repair.
+	 * For recover we need to record a bad block on all
+	 * non-sync devices, or abort the recovery
+	 */
+	if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
+		return;
+	/* During recovery devices cannot be removed, so locking and
+	 * refcounting of rdevs is not needed
+	 */
+	for (i = 0; i < conf->raid_disks; i++) {
+		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		if (!rdev
+		    || test_bit(Faulty, &rdev->flags)
+		    || test_bit(In_sync, &rdev->flags))
+			continue;
+		if (!rdev_set_badblocks(rdev, sh->sector,
+					STRIPE_SECTORS, 0))
+			abort = 1;
+	}
+	if (abort) {
+		conf->recovery_disabled = conf->mddev->recovery_disabled;
+		set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+	}
+}
+
+/* fetch_block - checks the given member device to see if its data needs
  * to be read or computed to satisfy a request.
  *
  * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill5 to continue
+ * 0 to tell the loop in handle_stripe_fill to continue
  */
-static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
-			int disk_idx, int disks)
+static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
+		       int disk_idx, int disks)
 {
 	struct r5dev *dev = &sh->dev[disk_idx];
-	struct r5dev *failed_dev = &sh->dev[s->failed_num];
+	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
+				  &sh->dev[s->failed_num[1]] };
 
 	/* is the data in this block needed, and can we get it? */
 	if (!test_bit(R5_LOCKED, &dev->flags) &&
@@ -2333,91 +2412,19 @@
 	    (dev->toread ||
 	     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
 	     s->syncing || s->expanding ||
-	     (s->failed &&
-	      (failed_dev->toread ||
-	       (failed_dev->towrite &&
-		!test_bit(R5_OVERWRITE, &failed_dev->flags)))))) {
-		/* We would like to get this block, possibly by computing it,
-		 * otherwise read it if the backing disk is insync
-		 */
-		if ((s->uptodate == disks - 1) &&
-		    (s->failed && disk_idx == s->failed_num)) {
-			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
-			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
-			set_bit(R5_Wantcompute, &dev->flags);
-			sh->ops.target = disk_idx;
-			sh->ops.target2 = -1;
-			s->req_compute = 1;
-			/* Careful: from this point on 'uptodate' is in the eye
-			 * of raid_run_ops which services 'compute' operations
-			 * before writes. R5_Wantcompute flags a block that will
-			 * be R5_UPTODATE by the time it is needed for a
-			 * subsequent operation.
-			 */
-			s->uptodate++;
-			return 1; /* uptodate + compute == disks */
-		} else if (test_bit(R5_Insync, &dev->flags)) {
-			set_bit(R5_LOCKED, &dev->flags);
-			set_bit(R5_Wantread, &dev->flags);
-			s->locked++;
-			pr_debug("Reading block %d (sync=%d)\n", disk_idx,
-				s->syncing);
-		}
-	}
-
-	return 0;
-}
-
-/**
- * handle_stripe_fill5 - read or compute data to satisfy pending requests.
- */
-static void handle_stripe_fill5(struct stripe_head *sh,
-			struct stripe_head_state *s, int disks)
-{
-	int i;
-
-	/* look for blocks to read/compute, skip this if a compute
-	 * is already in flight, or if the stripe contents are in the
-	 * midst of changing due to a write
-	 */
-	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
-	    !sh->reconstruct_state)
-		for (i = disks; i--; )
-			if (fetch_block5(sh, s, i, disks))
-				break;
-	set_bit(STRIPE_HANDLE, &sh->state);
-}
-
-/* fetch_block6 - checks the given member device to see if its data needs
- * to be read or computed to satisfy a request.
- *
- * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill6 to continue
- */
-static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
-			 struct r6_state *r6s, int disk_idx, int disks)
-{
-	struct r5dev *dev = &sh->dev[disk_idx];
-	struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
-				  &sh->dev[r6s->failed_num[1]] };
-
-	if (!test_bit(R5_LOCKED, &dev->flags) &&
-	    !test_bit(R5_UPTODATE, &dev->flags) &&
-	    (dev->toread ||
-	     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-	     s->syncing || s->expanding ||
-	     (s->failed >= 1 &&
-	      (fdev[0]->toread || s->to_write)) ||
-	     (s->failed >= 2 &&
-	      (fdev[1]->toread || s->to_write)))) {
+	     (s->failed >= 1 && fdev[0]->toread) ||
+	     (s->failed >= 2 && fdev[1]->toread) ||
+	     (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
+	      !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
+	     (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
 		/* we would like to get this block, possibly by computing it,
 		 * otherwise read it if the backing disk is insync
 		 */
 		BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
 		BUG_ON(test_bit(R5_Wantread, &dev->flags));
 		if ((s->uptodate == disks - 1) &&
-		    (s->failed && (disk_idx == r6s->failed_num[0] ||
-				   disk_idx == r6s->failed_num[1]))) {
+		    (s->failed && (disk_idx == s->failed_num[0] ||
+				   disk_idx == s->failed_num[1]))) {
 			/* have disk failed, and we're requested to fetch it;
 			 * do compute it
 			 */
@@ -2429,6 +2436,12 @@
 			sh->ops.target = disk_idx;
 			sh->ops.target2 = -1; /* no 2nd target */
 			s->req_compute = 1;
+			/* Careful: from this point on 'uptodate' is in the eye
+			 * of raid_run_ops which services 'compute' operations
+			 * before writes. R5_Wantcompute flags a block that will
+			 * be R5_UPTODATE by the time it is needed for a
+			 * subsequent operation.
+			 */
 			s->uptodate++;
 			return 1;
 		} else if (s->uptodate == disks-2 && s->failed >= 2) {
@@ -2469,11 +2482,11 @@
 }
 
 /**
- * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ * handle_stripe_fill - read or compute data to satisfy pending requests.
  */
-static void handle_stripe_fill6(struct stripe_head *sh,
-			struct stripe_head_state *s, struct r6_state *r6s,
-			int disks)
+static void handle_stripe_fill(struct stripe_head *sh,
+			       struct stripe_head_state *s,
+			       int disks)
 {
 	int i;
 
@@ -2484,7 +2497,7 @@
 	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
 	    !sh->reconstruct_state)
 		for (i = disks; i--; )
-			if (fetch_block6(sh, s, r6s, i, disks))
+			if (fetch_block(sh, s, i, disks))
 				break;
 	set_bit(STRIPE_HANDLE, &sh->state);
 }
@@ -2540,11 +2553,19 @@
 			md_wakeup_thread(conf->mddev->thread);
 }
 
-static void handle_stripe_dirtying5(raid5_conf_t *conf,
-		struct stripe_head *sh,	struct stripe_head_state *s, int disks)
+static void handle_stripe_dirtying(raid5_conf_t *conf,
+				   struct stripe_head *sh,
+				   struct stripe_head_state *s,
+				   int disks)
 {
 	int rmw = 0, rcw = 0, i;
-	for (i = disks; i--; ) {
+	if (conf->max_degraded == 2) {
+		/* RAID6 requires 'rcw' in current implementation
+		 * Calculate the real rcw later - for now fake it
+		 * look like rcw is cheaper
+		 */
+		rcw = 1; rmw = 2;
+	} else for (i = disks; i--; ) {
 		/* would I have to read this buffer for read_modify_write */
 		struct r5dev *dev = &sh->dev[i];
 		if ((dev->towrite || i == sh->pd_idx) &&
@@ -2591,16 +2612,19 @@
 				}
 			}
 		}
-	if (rcw <= rmw && rcw > 0)
+	if (rcw <= rmw && rcw > 0) {
 		/* want reconstruct write, but need to get some data */
+		rcw = 0;
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if (!test_bit(R5_OVERWRITE, &dev->flags) &&
-			    i != sh->pd_idx &&
+			    i != sh->pd_idx && i != sh->qd_idx &&
 			    !test_bit(R5_LOCKED, &dev->flags) &&
 			    !(test_bit(R5_UPTODATE, &dev->flags) ||
-			    test_bit(R5_Wantcompute, &dev->flags)) &&
-			    test_bit(R5_Insync, &dev->flags)) {
+			      test_bit(R5_Wantcompute, &dev->flags))) {
+				rcw++;
+				if (!test_bit(R5_Insync, &dev->flags))
+					continue; /* it's a failed drive */
 				if (
 				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 					pr_debug("Read_old block "
@@ -2614,6 +2638,7 @@
 				}
 			}
 		}
+	}
 	/* now if nothing is locked, and if we have enough data,
 	 * we can start a write request
 	 */
@@ -2630,53 +2655,6 @@
 		schedule_reconstruction(sh, s, rcw == 0, 0);
 }
 
-static void handle_stripe_dirtying6(raid5_conf_t *conf,
-		struct stripe_head *sh,	struct stripe_head_state *s,
-		struct r6_state *r6s, int disks)
-{
-	int rcw = 0, pd_idx = sh->pd_idx, i;
-	int qd_idx = sh->qd_idx;
-
-	set_bit(STRIPE_HANDLE, &sh->state);
-	for (i = disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-		/* check if we haven't enough data */
-		if (!test_bit(R5_OVERWRITE, &dev->flags) &&
-		    i != pd_idx && i != qd_idx &&
-		    !test_bit(R5_LOCKED, &dev->flags) &&
-		    !(test_bit(R5_UPTODATE, &dev->flags) ||
-		      test_bit(R5_Wantcompute, &dev->flags))) {
-			rcw++;
-			if (!test_bit(R5_Insync, &dev->flags))
-				continue; /* it's a failed drive */
-
-			if (
-			  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-				pr_debug("Read_old stripe %llu "
-					"block %d for Reconstruct\n",
-				     (unsigned long long)sh->sector, i);
-				set_bit(R5_LOCKED, &dev->flags);
-				set_bit(R5_Wantread, &dev->flags);
-				s->locked++;
-			} else {
-				pr_debug("Request delayed stripe %llu "
-					"block %d for Reconstruct\n",
-				     (unsigned long long)sh->sector, i);
-				set_bit(STRIPE_DELAYED, &sh->state);
-				set_bit(STRIPE_HANDLE, &sh->state);
-			}
-		}
-	}
-	/* now if nothing is locked, and if we have enough data, we can start a
-	 * write request
-	 */
-	if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
-	    s->locked == 0 && rcw == 0 &&
-	    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-		schedule_reconstruction(sh, s, 1, 0);
-	}
-}
-
 static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 				struct stripe_head_state *s, int disks)
 {
@@ -2695,7 +2673,7 @@
 			s->uptodate--;
 			break;
 		}
-		dev = &sh->dev[s->failed_num];
+		dev = &sh->dev[s->failed_num[0]];
 		/* fall through */
 	case check_state_compute_result:
 		sh->check_state = check_state_idle;
@@ -2767,7 +2745,7 @@
 
 static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
 				  struct stripe_head_state *s,
-				  struct r6_state *r6s, int disks)
+				  int disks)
 {
 	int pd_idx = sh->pd_idx;
 	int qd_idx = sh->qd_idx;
@@ -2786,14 +2764,14 @@
 	switch (sh->check_state) {
 	case check_state_idle:
 		/* start a new check operation if there are < 2 failures */
-		if (s->failed == r6s->q_failed) {
+		if (s->failed == s->q_failed) {
 			/* The only possible failed device holds Q, so it
 			 * makes sense to check P (If anything else were failed,
 			 * we would have used P to recreate it).
 			 */
 			sh->check_state = check_state_run;
 		}
-		if (!r6s->q_failed && s->failed < 2) {
+		if (!s->q_failed && s->failed < 2) {
 			/* Q is not failed, and we didn't use it to generate
 			 * anything, so it makes sense to check it
 			 */
@@ -2835,13 +2813,13 @@
 		 */
 		BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
 		if (s->failed == 2) {
-			dev = &sh->dev[r6s->failed_num[1]];
+			dev = &sh->dev[s->failed_num[1]];
 			s->locked++;
 			set_bit(R5_LOCKED, &dev->flags);
 			set_bit(R5_Wantwrite, &dev->flags);
 		}
 		if (s->failed >= 1) {
-			dev = &sh->dev[r6s->failed_num[0]];
+			dev = &sh->dev[s->failed_num[0]];
 			s->locked++;
 			set_bit(R5_LOCKED, &dev->flags);
 			set_bit(R5_Wantwrite, &dev->flags);
@@ -2928,8 +2906,7 @@
 	}
 }
 
-static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
-				struct r6_state *r6s)
+static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh)
 {
 	int i;
 
@@ -2971,7 +2948,7 @@
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
 			for (j = 0; j < conf->raid_disks; j++)
 				if (j != sh2->pd_idx &&
-				    (!r6s || j != sh2->qd_idx) &&
+				    j != sh2->qd_idx &&
 				    !test_bit(R5_Expanded, &sh2->dev[j].flags))
 					break;
 			if (j == conf->raid_disks) {
@@ -3006,43 +2983,35 @@
  *
  */
 
-static void handle_stripe5(struct stripe_head *sh)
+static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 {
 	raid5_conf_t *conf = sh->raid_conf;
-	int disks = sh->disks, i;
-	struct bio *return_bi = NULL;
-	struct stripe_head_state s;
+	int disks = sh->disks;
 	struct r5dev *dev;
-	mdk_rdev_t *blocked_rdev = NULL;
-	int prexor;
-	int dec_preread_active = 0;
+	int i;
 
-	memset(&s, 0, sizeof(s));
-	pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
-		 "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state,
-		 atomic_read(&sh->count), sh->pd_idx, sh->check_state,
-		 sh->reconstruct_state);
+	memset(s, 0, sizeof(*s));
 
-	spin_lock(&sh->lock);
-	clear_bit(STRIPE_HANDLE, &sh->state);
-	clear_bit(STRIPE_DELAYED, &sh->state);
-
-	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
-	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+	s->syncing = test_bit(STRIPE_SYNCING, &sh->state);
+	s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+	s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+	s->failed_num[0] = -1;
+	s->failed_num[1] = -1;
 
 	/* Now to look around and see what can be done */
 	rcu_read_lock();
+	spin_lock_irq(&conf->device_lock);
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
+		sector_t first_bad;
+		int bad_sectors;
+		int is_bad = 0;
 
 		dev = &sh->dev[i];
 
-		pr_debug("check %d: state 0x%lx toread %p read %p write %p "
-			"written %p\n",	i, dev->flags, dev->toread, dev->read,
-			dev->towrite, dev->written);
-
-		/* maybe we can request a biofill operation
+		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
+			i, dev->flags, dev->toread, dev->towrite, dev->written);
+		/* maybe we can reply to a read
 		 *
 		 * new wantfill requests are only permitted while
 		 * ops_complete_biofill is guaranteed to be inactive
@@ -3052,37 +3021,74 @@
 			set_bit(R5_Wantfill, &dev->flags);
 
 		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
-		if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
+		if (test_bit(R5_LOCKED, &dev->flags))
+			s->locked++;
+		if (test_bit(R5_UPTODATE, &dev->flags))
+			s->uptodate++;
+		if (test_bit(R5_Wantcompute, &dev->flags)) {
+			s->compute++;
+			BUG_ON(s->compute > 2);
+		}
 
 		if (test_bit(R5_Wantfill, &dev->flags))
-			s.to_fill++;
+			s->to_fill++;
 		else if (dev->toread)
-			s.to_read++;
+			s->to_read++;
 		if (dev->towrite) {
-			s.to_write++;
+			s->to_write++;
 			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				s.non_overwrite++;
+				s->non_overwrite++;
 		}
 		if (dev->written)
-			s.written++;
+			s->written++;
 		rdev = rcu_dereference(conf->disks[i].rdev);
-		if (blocked_rdev == NULL &&
-		    rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-			blocked_rdev = rdev;
-			atomic_inc(&rdev->nr_pending);
+		if (rdev) {
+			is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+					     &first_bad, &bad_sectors);
+			if (s->blocked_rdev == NULL
+			    && (test_bit(Blocked, &rdev->flags)
+				|| is_bad < 0)) {
+				if (is_bad < 0)
+					set_bit(BlockedBadBlocks,
+						&rdev->flags);
+				s->blocked_rdev = rdev;
+				atomic_inc(&rdev->nr_pending);
+			}
 		}
 		clear_bit(R5_Insync, &dev->flags);
 		if (!rdev)
 			/* Not in-sync */;
-		else if (test_bit(In_sync, &rdev->flags))
+		else if (is_bad) {
+			/* also not in-sync */
+			if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+				/* treat as in-sync, but with a read error
+				 * which we can now try to correct
+				 */
+				set_bit(R5_Insync, &dev->flags);
+				set_bit(R5_ReadError, &dev->flags);
+			}
+		} else if (test_bit(In_sync, &rdev->flags))
 			set_bit(R5_Insync, &dev->flags);
 		else {
-			/* could be in-sync depending on recovery/reshape status */
+			/* in sync if before recovery_offset */
 			if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
 				set_bit(R5_Insync, &dev->flags);
 		}
+		if (test_bit(R5_WriteError, &dev->flags)) {
+			clear_bit(R5_Insync, &dev->flags);
+			if (!test_bit(Faulty, &rdev->flags)) {
+				s->handle_bad_blocks = 1;
+				atomic_inc(&rdev->nr_pending);
+			} else
+				clear_bit(R5_WriteError, &dev->flags);
+		}
+		if (test_bit(R5_MadeGood, &dev->flags)) {
+			if (!test_bit(Faulty, &rdev->flags)) {
+				s->handle_bad_blocks = 1;
+				atomic_inc(&rdev->nr_pending);
+			} else
+				clear_bit(R5_MadeGood, &dev->flags);
+		}
 		if (!test_bit(R5_Insync, &dev->flags)) {
 			/* The ReadError flag will just be confusing now */
 			clear_bit(R5_ReadError, &dev->flags);
@@ -3091,21 +3097,60 @@
 		if (test_bit(R5_ReadError, &dev->flags))
 			clear_bit(R5_Insync, &dev->flags);
 		if (!test_bit(R5_Insync, &dev->flags)) {
-			s.failed++;
-			s.failed_num = i;
+			if (s->failed < 2)
+				s->failed_num[s->failed] = i;
+			s->failed++;
 		}
 	}
+	spin_unlock_irq(&conf->device_lock);
 	rcu_read_unlock();
+}
 
-	if (unlikely(blocked_rdev)) {
+static void handle_stripe(struct stripe_head *sh)
+{
+	struct stripe_head_state s;
+	raid5_conf_t *conf = sh->raid_conf;
+	int i;
+	int prexor;
+	int disks = sh->disks;
+	struct r5dev *pdev, *qdev;
+
+	clear_bit(STRIPE_HANDLE, &sh->state);
+	if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
+		/* already being handled, ensure it gets handled
+		 * again when current action finishes */
+		set_bit(STRIPE_HANDLE, &sh->state);
+		return;
+	}
+
+	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+		set_bit(STRIPE_SYNCING, &sh->state);
+		clear_bit(STRIPE_INSYNC, &sh->state);
+	}
+	clear_bit(STRIPE_DELAYED, &sh->state);
+
+	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
+		"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
+	       (unsigned long long)sh->sector, sh->state,
+	       atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
+	       sh->check_state, sh->reconstruct_state);
+
+	analyse_stripe(sh, &s);
+
+	if (s.handle_bad_blocks) {
+		set_bit(STRIPE_HANDLE, &sh->state);
+		goto finish;
+	}
+
+	if (unlikely(s.blocked_rdev)) {
 		if (s.syncing || s.expanding || s.expanded ||
 		    s.to_write || s.written) {
 			set_bit(STRIPE_HANDLE, &sh->state);
-			goto unlock;
+			goto finish;
 		}
 		/* There is nothing for the blocked_rdev to block */
-		rdev_dec_pending(blocked_rdev, conf->mddev);
-		blocked_rdev = NULL;
+		rdev_dec_pending(s.blocked_rdev, conf->mddev);
+		s.blocked_rdev = NULL;
 	}
 
 	if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
@@ -3114,38 +3159,46 @@
 	}
 
 	pr_debug("locked=%d uptodate=%d to_read=%d"
-		" to_write=%d failed=%d failed_num=%d\n",
-		s.locked, s.uptodate, s.to_read, s.to_write,
-		s.failed, s.failed_num);
-	/* check if the array has lost two devices and, if so, some requests might
-	 * need to be failed
+	       " to_write=%d failed=%d failed_num=%d,%d\n",
+	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
+	       s.failed_num[0], s.failed_num[1]);
+	/* check if the array has lost more than max_degraded devices and,
+	 * if so, some requests might need to be failed.
 	 */
-	if (s.failed > 1 && s.to_read+s.to_write+s.written)
-		handle_failed_stripe(conf, sh, &s, disks, &return_bi);
-	if (s.failed > 1 && s.syncing) {
-		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
-		clear_bit(STRIPE_SYNCING, &sh->state);
-		s.syncing = 0;
-	}
+	if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
+		handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
+	if (s.failed > conf->max_degraded && s.syncing)
+		handle_failed_sync(conf, sh, &s);
 
-	/* might be able to return some write requests if the parity block
-	 * is safe, or on a failed drive
+	/*
+	 * might be able to return some write requests if the parity blocks
+	 * are safe, or on a failed drive
 	 */
-	dev = &sh->dev[sh->pd_idx];
-	if ( s.written &&
-	     ((test_bit(R5_Insync, &dev->flags) &&
-	       !test_bit(R5_LOCKED, &dev->flags) &&
-	       test_bit(R5_UPTODATE, &dev->flags)) ||
-	       (s.failed == 1 && s.failed_num == sh->pd_idx)))
-		handle_stripe_clean_event(conf, sh, disks, &return_bi);
+	pdev = &sh->dev[sh->pd_idx];
+	s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+		|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+	qdev = &sh->dev[sh->qd_idx];
+	s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+		|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+		|| conf->level < 6;
+
+	if (s.written &&
+	    (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+			     && !test_bit(R5_LOCKED, &pdev->flags)
+			     && test_bit(R5_UPTODATE, &pdev->flags)))) &&
+	    (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+			     && !test_bit(R5_LOCKED, &qdev->flags)
+			     && test_bit(R5_UPTODATE, &qdev->flags)))))
+		handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
 
 	/* Now we might consider reading some blocks, either to check/generate
 	 * parity, or to satisfy requests
 	 * or to load a block that is being partially written.
 	 */
-	if (s.to_read || s.non_overwrite ||
-	    (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
-		handle_stripe_fill5(sh, &s, disks);
+	if (s.to_read || s.non_overwrite
+	    || (conf->level == 6 && s.to_write && s.failed)
+	    || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
+		handle_stripe_fill(sh, &s, disks);
 
 	/* Now we check to see if any write operations have recently
 	 * completed
@@ -3161,21 +3214,25 @@
 		 * be written back to disk
 		 */
 		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+		BUG_ON(sh->qd_idx >= 0 &&
+		       !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags));
 		for (i = disks; i--; ) {
-			dev = &sh->dev[i];
+			struct r5dev *dev = &sh->dev[i];
 			if (test_bit(R5_LOCKED, &dev->flags) &&
-				(i == sh->pd_idx || dev->written)) {
+				(i == sh->pd_idx || i == sh->qd_idx ||
+				 dev->written)) {
 				pr_debug("Writing block %d\n", i);
 				set_bit(R5_Wantwrite, &dev->flags);
 				if (prexor)
 					continue;
 				if (!test_bit(R5_Insync, &dev->flags) ||
-				    (i == sh->pd_idx && s.failed == 0))
+				    ((i == sh->pd_idx || i == sh->qd_idx)  &&
+				     s.failed == 0))
 					set_bit(STRIPE_INSYNC, &sh->state);
 			}
 		}
 		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-			dec_preread_active = 1;
+			s.dec_preread_active = 1;
 	}
 
 	/* Now to consider new write requests and what else, if anything
@@ -3185,7 +3242,7 @@
 	 *    block.
 	 */
 	if (s.to_write && !sh->reconstruct_state && !sh->check_state)
-		handle_stripe_dirtying5(conf, sh, &s, disks);
+		handle_stripe_dirtying(conf, sh, &s, disks);
 
 	/* maybe we need to check and possibly fix the parity for this stripe
 	 * Any reads will already have been scheduled, so we just see if enough
@@ -3195,54 +3252,61 @@
 	if (sh->check_state ||
 	    (s.syncing && s.locked == 0 &&
 	     !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
-	     !test_bit(STRIPE_INSYNC, &sh->state)))
-		handle_parity_checks5(conf, sh, &s, disks);
+	     !test_bit(STRIPE_INSYNC, &sh->state))) {
+		if (conf->level == 6)
+			handle_parity_checks6(conf, sh, &s, disks);
+		else
+			handle_parity_checks5(conf, sh, &s, disks);
+	}
 
 	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
-		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 	}
 
-	/* If the failed drive is just a ReadError, then we might need to progress
-	 * the repair/check process
+	/* If the failed drives are just a ReadError, then we might need
+	 * to progress the repair/check process
 	 */
-	if (s.failed == 1 && !conf->mddev->ro &&
-	    test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
-	    && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
-	    && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
-		) {
-		dev = &sh->dev[s.failed_num];
-		if (!test_bit(R5_ReWrite, &dev->flags)) {
-			set_bit(R5_Wantwrite, &dev->flags);
-			set_bit(R5_ReWrite, &dev->flags);
-			set_bit(R5_LOCKED, &dev->flags);
-			s.locked++;
-		} else {
-			/* let's read it back */
-			set_bit(R5_Wantread, &dev->flags);
-			set_bit(R5_LOCKED, &dev->flags);
-			s.locked++;
+	if (s.failed <= conf->max_degraded && !conf->mddev->ro)
+		for (i = 0; i < s.failed; i++) {
+			struct r5dev *dev = &sh->dev[s.failed_num[i]];
+			if (test_bit(R5_ReadError, &dev->flags)
+			    && !test_bit(R5_LOCKED, &dev->flags)
+			    && test_bit(R5_UPTODATE, &dev->flags)
+				) {
+				if (!test_bit(R5_ReWrite, &dev->flags)) {
+					set_bit(R5_Wantwrite, &dev->flags);
+					set_bit(R5_ReWrite, &dev->flags);
+					set_bit(R5_LOCKED, &dev->flags);
+					s.locked++;
+				} else {
+					/* let's read it back */
+					set_bit(R5_Wantread, &dev->flags);
+					set_bit(R5_LOCKED, &dev->flags);
+					s.locked++;
+				}
+			}
 		}
-	}
+
 
 	/* Finish reconstruct operations initiated by the expansion process */
 	if (sh->reconstruct_state == reconstruct_state_result) {
-		struct stripe_head *sh2
+		struct stripe_head *sh_src
 			= get_active_stripe(conf, sh->sector, 1, 1, 1);
-		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
-			/* sh cannot be written until sh2 has been read.
+		if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
+			/* sh cannot be written until sh_src has been read.
 			 * so arrange for sh to be delayed a little
 			 */
 			set_bit(STRIPE_DELAYED, &sh->state);
 			set_bit(STRIPE_HANDLE, &sh->state);
 			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
-					      &sh2->state))
+					      &sh_src->state))
 				atomic_inc(&conf->preread_active_stripes);
-			release_stripe(sh2);
-			goto unlock;
+			release_stripe(sh_src);
+			goto finish;
 		}
-		if (sh2)
-			release_stripe(sh2);
+		if (sh_src)
+			release_stripe(sh_src);
 
 		sh->reconstruct_state = reconstruct_state_idle;
 		clear_bit(STRIPE_EXPANDING, &sh->state);
@@ -3268,323 +3332,39 @@
 
 	if (s.expanding && s.locked == 0 &&
 	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
-		handle_stripe_expansion(conf, sh, NULL);
+		handle_stripe_expansion(conf, sh);
 
- unlock:
-	spin_unlock(&sh->lock);
-
+finish:
 	/* wait for this device to become unblocked */
-	if (unlikely(blocked_rdev))
-		md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+	if (unlikely(s.blocked_rdev))
+		md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
 
-	if (s.ops_request)
-		raid_run_ops(sh, s.ops_request);
-
-	ops_run_io(sh, &s);
-
-	if (dec_preread_active) {
-		/* We delay this until after ops_run_io so that if make_request
-		 * is waiting on a flush, it won't continue until the writes
-		 * have actually been submitted.
-		 */
-		atomic_dec(&conf->preread_active_stripes);
-		if (atomic_read(&conf->preread_active_stripes) <
-		    IO_THRESHOLD)
-			md_wakeup_thread(conf->mddev->thread);
-	}
-	return_io(return_bi);
-}
-
-static void handle_stripe6(struct stripe_head *sh)
-{
-	raid5_conf_t *conf = sh->raid_conf;
-	int disks = sh->disks;
-	struct bio *return_bi = NULL;
-	int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
-	struct stripe_head_state s;
-	struct r6_state r6s;
-	struct r5dev *dev, *pdev, *qdev;
-	mdk_rdev_t *blocked_rdev = NULL;
-	int dec_preread_active = 0;
-
-	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
-		"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
-	       (unsigned long long)sh->sector, sh->state,
-	       atomic_read(&sh->count), pd_idx, qd_idx,
-	       sh->check_state, sh->reconstruct_state);
-	memset(&s, 0, sizeof(s));
-
-	spin_lock(&sh->lock);
-	clear_bit(STRIPE_HANDLE, &sh->state);
-	clear_bit(STRIPE_DELAYED, &sh->state);
-
-	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
-	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
-	/* Now to look around and see what can be done */
-
-	rcu_read_lock();
-	for (i=disks; i--; ) {
-		mdk_rdev_t *rdev;
-		dev = &sh->dev[i];
-
-		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
-			i, dev->flags, dev->toread, dev->towrite, dev->written);
-		/* maybe we can reply to a read
-		 *
-		 * new wantfill requests are only permitted while
-		 * ops_complete_biofill is guaranteed to be inactive
-		 */
-		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
-		    !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
-			set_bit(R5_Wantfill, &dev->flags);
-
-		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
-		if (test_bit(R5_Wantcompute, &dev->flags)) {
-			s.compute++;
-			BUG_ON(s.compute > 2);
-		}
-
-		if (test_bit(R5_Wantfill, &dev->flags)) {
-			s.to_fill++;
-		} else if (dev->toread)
-			s.to_read++;
-		if (dev->towrite) {
-			s.to_write++;
-			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				s.non_overwrite++;
-		}
-		if (dev->written)
-			s.written++;
-		rdev = rcu_dereference(conf->disks[i].rdev);
-		if (blocked_rdev == NULL &&
-		    rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-			blocked_rdev = rdev;
-			atomic_inc(&rdev->nr_pending);
-		}
-		clear_bit(R5_Insync, &dev->flags);
-		if (!rdev)
-			/* Not in-sync */;
-		else if (test_bit(In_sync, &rdev->flags))
-			set_bit(R5_Insync, &dev->flags);
-		else {
-			/* in sync if before recovery_offset */
-			if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
-				set_bit(R5_Insync, &dev->flags);
-		}
-		if (!test_bit(R5_Insync, &dev->flags)) {
-			/* The ReadError flag will just be confusing now */
-			clear_bit(R5_ReadError, &dev->flags);
-			clear_bit(R5_ReWrite, &dev->flags);
-		}
-		if (test_bit(R5_ReadError, &dev->flags))
-			clear_bit(R5_Insync, &dev->flags);
-		if (!test_bit(R5_Insync, &dev->flags)) {
-			if (s.failed < 2)
-				r6s.failed_num[s.failed] = i;
-			s.failed++;
-		}
-	}
-	rcu_read_unlock();
-
-	if (unlikely(blocked_rdev)) {
-		if (s.syncing || s.expanding || s.expanded ||
-		    s.to_write || s.written) {
-			set_bit(STRIPE_HANDLE, &sh->state);
-			goto unlock;
-		}
-		/* There is nothing for the blocked_rdev to block */
-		rdev_dec_pending(blocked_rdev, conf->mddev);
-		blocked_rdev = NULL;
-	}
-
-	if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
-		set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
-		set_bit(STRIPE_BIOFILL_RUN, &sh->state);
-	}
-
-	pr_debug("locked=%d uptodate=%d to_read=%d"
-	       " to_write=%d failed=%d failed_num=%d,%d\n",
-	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
-	       r6s.failed_num[0], r6s.failed_num[1]);
-	/* check if the array has lost >2 devices and, if so, some requests
-	 * might need to be failed
-	 */
-	if (s.failed > 2 && s.to_read+s.to_write+s.written)
-		handle_failed_stripe(conf, sh, &s, disks, &return_bi);
-	if (s.failed > 2 && s.syncing) {
-		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
-		clear_bit(STRIPE_SYNCING, &sh->state);
-		s.syncing = 0;
-	}
-
-	/*
-	 * might be able to return some write requests if the parity blocks
-	 * are safe, or on a failed drive
-	 */
-	pdev = &sh->dev[pd_idx];
-	r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
-		|| (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
-	qdev = &sh->dev[qd_idx];
-	r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
-		|| (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
-
-	if ( s.written &&
-	     ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
-			     && !test_bit(R5_LOCKED, &pdev->flags)
-			     && test_bit(R5_UPTODATE, &pdev->flags)))) &&
-	     ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
-			     && !test_bit(R5_LOCKED, &qdev->flags)
-			     && test_bit(R5_UPTODATE, &qdev->flags)))))
-		handle_stripe_clean_event(conf, sh, disks, &return_bi);
-
-	/* Now we might consider reading some blocks, either to check/generate
-	 * parity, or to satisfy requests
-	 * or to load a block that is being partially written.
-	 */
-	if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
-	    (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
-		handle_stripe_fill6(sh, &s, &r6s, disks);
-
-	/* Now we check to see if any write operations have recently
-	 * completed
-	 */
-	if (sh->reconstruct_state == reconstruct_state_drain_result) {
-
-		sh->reconstruct_state = reconstruct_state_idle;
-		/* All the 'written' buffers and the parity blocks are ready to
-		 * be written back to disk
-		 */
-		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
-		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+	if (s.handle_bad_blocks)
 		for (i = disks; i--; ) {
-			dev = &sh->dev[i];
-			if (test_bit(R5_LOCKED, &dev->flags) &&
-			    (i == sh->pd_idx || i == qd_idx ||
-			     dev->written)) {
-				pr_debug("Writing block %d\n", i);
-				BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
-				set_bit(R5_Wantwrite, &dev->flags);
-				if (!test_bit(R5_Insync, &dev->flags) ||
-				    ((i == sh->pd_idx || i == qd_idx) &&
-				      s.failed == 0))
-					set_bit(STRIPE_INSYNC, &sh->state);
+			mdk_rdev_t *rdev;
+			struct r5dev *dev = &sh->dev[i];
+			if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
+				/* We own a safe reference to the rdev */
+				rdev = conf->disks[i].rdev;
+				if (!rdev_set_badblocks(rdev, sh->sector,
+							STRIPE_SECTORS, 0))
+					md_error(conf->mddev, rdev);
+				rdev_dec_pending(rdev, conf->mddev);
+			}
+			if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
+				rdev = conf->disks[i].rdev;
+				rdev_clear_badblocks(rdev, sh->sector,
+						     STRIPE_SECTORS);
+				rdev_dec_pending(rdev, conf->mddev);
 			}
 		}
-		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-			dec_preread_active = 1;
-	}
-
-	/* Now to consider new write requests and what else, if anything
-	 * should be read.  We do not handle new writes when:
-	 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
-	 * 2/ A 'check' operation is in flight, as it may clobber the parity
-	 *    block.
-	 */
-	if (s.to_write && !sh->reconstruct_state && !sh->check_state)
-		handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
-
-	/* maybe we need to check and possibly fix the parity for this stripe
-	 * Any reads will already have been scheduled, so we just see if enough
-	 * data is available.  The parity check is held off while parity
-	 * dependent operations are in flight.
-	 */
-	if (sh->check_state ||
-	    (s.syncing && s.locked == 0 &&
-	     !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
-	     !test_bit(STRIPE_INSYNC, &sh->state)))
-		handle_parity_checks6(conf, sh, &s, &r6s, disks);
-
-	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
-		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
-		clear_bit(STRIPE_SYNCING, &sh->state);
-	}
-
-	/* If the failed drives are just a ReadError, then we might need
-	 * to progress the repair/check process
-	 */
-	if (s.failed <= 2 && !conf->mddev->ro)
-		for (i = 0; i < s.failed; i++) {
-			dev = &sh->dev[r6s.failed_num[i]];
-			if (test_bit(R5_ReadError, &dev->flags)
-			    && !test_bit(R5_LOCKED, &dev->flags)
-			    && test_bit(R5_UPTODATE, &dev->flags)
-				) {
-				if (!test_bit(R5_ReWrite, &dev->flags)) {
-					set_bit(R5_Wantwrite, &dev->flags);
-					set_bit(R5_ReWrite, &dev->flags);
-					set_bit(R5_LOCKED, &dev->flags);
-					s.locked++;
-				} else {
-					/* let's read it back */
-					set_bit(R5_Wantread, &dev->flags);
-					set_bit(R5_LOCKED, &dev->flags);
-					s.locked++;
-				}
-			}
-		}
-
-	/* Finish reconstruct operations initiated by the expansion process */
-	if (sh->reconstruct_state == reconstruct_state_result) {
-		sh->reconstruct_state = reconstruct_state_idle;
-		clear_bit(STRIPE_EXPANDING, &sh->state);
-		for (i = conf->raid_disks; i--; ) {
-			set_bit(R5_Wantwrite, &sh->dev[i].flags);
-			set_bit(R5_LOCKED, &sh->dev[i].flags);
-			s.locked++;
-		}
-	}
-
-	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
-	    !sh->reconstruct_state) {
-		struct stripe_head *sh2
-			= get_active_stripe(conf, sh->sector, 1, 1, 1);
-		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
-			/* sh cannot be written until sh2 has been read.
-			 * so arrange for sh to be delayed a little
-			 */
-			set_bit(STRIPE_DELAYED, &sh->state);
-			set_bit(STRIPE_HANDLE, &sh->state);
-			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
-					      &sh2->state))
-				atomic_inc(&conf->preread_active_stripes);
-			release_stripe(sh2);
-			goto unlock;
-		}
-		if (sh2)
-			release_stripe(sh2);
-
-		/* Need to write out all blocks after computing P&Q */
-		sh->disks = conf->raid_disks;
-		stripe_set_idx(sh->sector, conf, 0, sh);
-		schedule_reconstruction(sh, &s, 1, 1);
-	} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
-		clear_bit(STRIPE_EXPAND_READY, &sh->state);
-		atomic_dec(&conf->reshape_stripes);
-		wake_up(&conf->wait_for_overlap);
-		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
-	}
-
-	if (s.expanding && s.locked == 0 &&
-	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
-		handle_stripe_expansion(conf, sh, &r6s);
-
- unlock:
-	spin_unlock(&sh->lock);
-
-	/* wait for this device to become unblocked */
-	if (unlikely(blocked_rdev))
-		md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
 
 	if (s.ops_request)
 		raid_run_ops(sh, s.ops_request);
 
 	ops_run_io(sh, &s);
 
-
-	if (dec_preread_active) {
+	if (s.dec_preread_active) {
 		/* We delay this until after ops_run_io so that if make_request
 		 * is waiting on a flush, it won't continue until the writes
 		 * have actually been submitted.
@@ -3595,15 +3375,9 @@
 			md_wakeup_thread(conf->mddev->thread);
 	}
 
-	return_io(return_bi);
-}
+	return_io(s.return_bi);
 
-static void handle_stripe(struct stripe_head *sh)
-{
-	if (sh->raid_conf->level == 6)
-		handle_stripe6(sh);
-	else
-		handle_stripe5(sh);
+	clear_bit(STRIPE_ACTIVE, &sh->state);
 }
 
 static void raid5_activate_delayed(raid5_conf_t *conf)
@@ -3833,6 +3607,9 @@
 	rcu_read_lock();
 	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
 	if (rdev && test_bit(In_sync, &rdev->flags)) {
+		sector_t first_bad;
+		int bad_sectors;
+
 		atomic_inc(&rdev->nr_pending);
 		rcu_read_unlock();
 		raid_bio->bi_next = (void*)rdev;
@@ -3840,8 +3617,10 @@
 		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
 		align_bi->bi_sector += rdev->data_offset;
 
-		if (!bio_fits_rdev(align_bi)) {
-			/* too big in some way */
+		if (!bio_fits_rdev(align_bi) ||
+		    is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+				&first_bad, &bad_sectors)) {
+			/* too big in some way, or has a known bad block */
 			bio_put(align_bi);
 			rdev_dec_pending(rdev, mddev);
 			return 0;
@@ -4016,7 +3795,7 @@
 				}
 			}
 
-			if (bio_data_dir(bi) == WRITE &&
+			if (rw == WRITE &&
 			    logical_sector >= mddev->suspend_lo &&
 			    logical_sector < mddev->suspend_hi) {
 				release_stripe(sh);
@@ -4034,7 +3813,7 @@
 			}
 
 			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
-			    !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
+			    !add_stripe_bio(sh, bi, dd_idx, rw)) {
 				/* Stripe is busy expanding or
 				 * add failed due to overlap.  Flush everything
 				 * and wait a while
@@ -4375,10 +4154,7 @@
 
 	bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
 
-	spin_lock(&sh->lock);
-	set_bit(STRIPE_SYNCING, &sh->state);
-	clear_bit(STRIPE_INSYNC, &sh->state);
-	spin_unlock(&sh->lock);
+	set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
 
 	handle_stripe(sh);
 	release_stripe(sh);
@@ -4509,6 +4285,9 @@
 		release_stripe(sh);
 		cond_resched();
 
+		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+			md_check_recovery(mddev);
+
 		spin_lock_irq(&conf->device_lock);
 	}
 	pr_debug("%d stripes handled\n", handled);
@@ -5313,6 +5092,7 @@
 		 * isn't possible.
 		 */
 		if (!test_bit(Faulty, &rdev->flags) &&
+		    mddev->recovery_disabled != conf->recovery_disabled &&
 		    !has_failed(conf) &&
 		    number < conf->raid_disks) {
 			err = -EBUSY;
@@ -5341,6 +5121,9 @@
 	int first = 0;
 	int last = conf->raid_disks - 1;
 
+	if (mddev->recovery_disabled == conf->recovery_disabled)
+		return -EBUSY;
+
 	if (has_failed(conf))
 		/* no point adding a device */
 		return -EINVAL;
@@ -5519,16 +5302,14 @@
 			if (rdev->raid_disk < 0 &&
 			    !test_bit(Faulty, &rdev->flags)) {
 				if (raid5_add_disk(mddev, rdev) == 0) {
-					char nm[20];
 					if (rdev->raid_disk
 					    >= conf->previous_raid_disks) {
 						set_bit(In_sync, &rdev->flags);
 						added_devices++;
 					} else
 						rdev->recovery_offset = 0;
-					sprintf(nm, "rd%d", rdev->raid_disk);
-					if (sysfs_create_link(&mddev->kobj,
-							      &rdev->kobj, nm))
+
+					if (sysfs_link_rdev(mddev, rdev))
 						/* Failure here is OK */;
 				}
 			} else if (rdev->raid_disk >= conf->previous_raid_disks
@@ -5624,9 +5405,7 @@
 			     d++) {
 				mdk_rdev_t *rdev = conf->disks[d].rdev;
 				if (rdev && raid5_remove_disk(mddev, d) == 0) {
-					char nm[20];
-					sprintf(nm, "rd%d", rdev->raid_disk);
-					sysfs_remove_link(&mddev->kobj, nm);
+					sysfs_unlink_rdev(mddev, rdev);
 					rdev->raid_disk = -1;
 				}
 			}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 3ca77a2..11b9566 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -6,11 +6,11 @@
 
 /*
  *
- * Each stripe contains one buffer per disc.  Each buffer can be in
+ * Each stripe contains one buffer per device.  Each buffer can be in
  * one of a number of states stored in "flags".  Changes between
- * these states happen *almost* exclusively under a per-stripe
- * spinlock.  Some very specific changes can happen in bi_end_io, and
- * these are not protected by the spin lock.
+ * these states happen *almost* exclusively under the protection of the
+ * STRIPE_ACTIVE flag.  Some very specific changes can happen in bi_end_io, and
+ * these are not protected by STRIPE_ACTIVE.
  *
  * The flag bits that are used to represent these states are:
  *   R5_UPTODATE and R5_LOCKED
@@ -76,12 +76,10 @@
  * block and the cached buffer are successfully written, any buffer on
  * a written list can be returned with b_end_io.
  *
- * The write list and read list both act as fifos.  The read list is
- * protected by the device_lock.  The write and written lists are
- * protected by the stripe lock.  The device_lock, which can be
- * claimed while the stipe lock is held, is only for list
- * manipulations and will only be held for a very short time.  It can
- * be claimed from interrupts.
+ * The write list and read list both act as fifos.  The read list,
+ * write list and written list are protected by the device_lock.
+ * The device_lock is only for list manipulations and will only be
+ * held for a very short time.  It can be claimed from interrupts.
  *
  *
  * Stripes in the stripe cache can be on one of two lists (or on
@@ -96,7 +94,6 @@
  *
  * The inactive_list, handle_list and hash bucket lists are all protected by the
  * device_lock.
- *  - stripes on the inactive_list never have their stripe_lock held.
  *  - stripes have a reference counter. If count==0, they are on a list.
  *  - If a stripe might need handling, STRIPE_HANDLE is set.
  *  - When refcount reaches zero, then if STRIPE_HANDLE it is put on
@@ -116,10 +113,10 @@
  *  attach a request to an active stripe (add_stripe_bh())
  *     lockdev attach-buffer unlockdev
  *  handle a stripe (handle_stripe())
- *     lockstripe clrSTRIPE_HANDLE ...
+ *     setSTRIPE_ACTIVE,  clrSTRIPE_HANDLE ...
  *		(lockdev check-buffers unlockdev) ..
  *		change-state ..
- *		record io/ops needed unlockstripe schedule io/ops
+ *		record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
  *  release an active stripe (release_stripe())
  *     lockdev if (!--cnt) { if  STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
  *
@@ -128,8 +125,7 @@
  * on a cached buffer, and plus one if the stripe is undergoing stripe
  * operations.
  *
- * Stripe operations are performed outside the stripe lock,
- * the stripe operations are:
+ * The stripe operations are:
  * -copying data between the stripe cache and user application buffers
  * -computing blocks to save a disk access, or to recover a missing block
  * -updating the parity on a write operation (reconstruct write and
@@ -159,7 +155,8 @@
  */
 
 /*
- * Operations state - intermediate states that are visible outside of sh->lock
+ * Operations state - intermediate states that are visible outside of 
+ *   STRIPE_ACTIVE.
  * In general _idle indicates nothing is running, _run indicates a data
  * processing operation is active, and _result means the data processing result
  * is stable and can be acted upon.  For simple operations like biofill and
@@ -209,7 +206,6 @@
 	short			ddf_layout;/* use DDF ordering to calculate Q */
 	unsigned long		state;		/* state flags */
 	atomic_t		count;	      /* nr of active thread/requests */
-	spinlock_t		lock;
 	int			bm_seq;	/* sequence number for bitmap flushes */
 	int			disks;		/* disks in stripe */
 	enum check_states	check_state;
@@ -240,19 +236,20 @@
 };
 
 /* stripe_head_state - collects and tracks the dynamic state of a stripe_head
- *     for handle_stripe.  It is only valid under spin_lock(sh->lock);
+ *     for handle_stripe.
  */
 struct stripe_head_state {
 	int syncing, expanding, expanded;
 	int locked, uptodate, to_read, to_write, failed, written;
 	int to_fill, compute, req_compute, non_overwrite;
-	int failed_num;
+	int failed_num[2];
+	int p_failed, q_failed;
+	int dec_preread_active;
 	unsigned long ops_request;
-};
 
-/* r6_state - extra state data only relevant to r6 */
-struct r6_state {
-	int p_failed, q_failed, failed_num[2];
+	struct bio *return_bi;
+	mdk_rdev_t *blocked_rdev;
+	int handle_bad_blocks;
 };
 
 /* Flags */
@@ -268,14 +265,16 @@
 #define	R5_ReWrite	9	/* have tried to over-write the readerror */
 
 #define	R5_Expanded	10	/* This block now has post-expand data */
-#define	R5_Wantcompute	11 /* compute_block in progress treat as
-				    * uptodate
-				    */
-#define	R5_Wantfill	12 /* dev->toread contains a bio that needs
-				    * filling
-				    */
-#define R5_Wantdrain	13 /* dev->towrite needs to be drained */
-#define R5_WantFUA	14	/* Write should be FUA */
+#define	R5_Wantcompute	11	/* compute_block in progress treat as
+				 * uptodate
+				 */
+#define	R5_Wantfill	12	/* dev->toread contains a bio that needs
+				 * filling
+				 */
+#define	R5_Wantdrain	13	/* dev->towrite needs to be drained */
+#define	R5_WantFUA	14	/* Write should be FUA */
+#define	R5_WriteError	15	/* got a write error - need to record it */
+#define	R5_MadeGood	16	/* A bad block has been fixed by writing to it*/
 /*
  * Write method
  */
@@ -289,21 +288,25 @@
 /*
  * Stripe state
  */
-#define STRIPE_HANDLE		2
-#define	STRIPE_SYNCING		3
-#define	STRIPE_INSYNC		4
-#define	STRIPE_PREREAD_ACTIVE	5
-#define	STRIPE_DELAYED		6
-#define	STRIPE_DEGRADED		7
-#define	STRIPE_BIT_DELAY	8
-#define	STRIPE_EXPANDING	9
-#define	STRIPE_EXPAND_SOURCE	10
-#define	STRIPE_EXPAND_READY	11
-#define	STRIPE_IO_STARTED	12 /* do not count towards 'bypass_count' */
-#define	STRIPE_FULL_WRITE	13 /* all blocks are set to be overwritten */
-#define	STRIPE_BIOFILL_RUN	14
-#define	STRIPE_COMPUTE_RUN	15
-#define	STRIPE_OPS_REQ_PENDING	16
+enum {
+	STRIPE_ACTIVE,
+	STRIPE_HANDLE,
+	STRIPE_SYNC_REQUESTED,
+	STRIPE_SYNCING,
+	STRIPE_INSYNC,
+	STRIPE_PREREAD_ACTIVE,
+	STRIPE_DELAYED,
+	STRIPE_DEGRADED,
+	STRIPE_BIT_DELAY,
+	STRIPE_EXPANDING,
+	STRIPE_EXPAND_SOURCE,
+	STRIPE_EXPAND_READY,
+	STRIPE_IO_STARTED,	/* do not count towards 'bypass_count' */
+	STRIPE_FULL_WRITE,	/* all blocks are set to be overwritten */
+	STRIPE_BIOFILL_RUN,
+	STRIPE_COMPUTE_RUN,
+	STRIPE_OPS_REQ_PENDING,
+};
 
 /*
  * Operation request flags
@@ -336,7 +339,7 @@
  * PREREAD_ACTIVE.
  * In stripe_handle, if we find pre-reading is necessary, we do it if
  * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
- * HANDLE gets cleared if stripe_handle leave nothing locked.
+ * HANDLE gets cleared if stripe_handle leaves nothing locked.
  */
 
 
@@ -399,7 +402,7 @@
 					    * (fresh device added).
 					    * Cleared when a sync completes.
 					    */
-
+	int			recovery_disabled;
 	/* per cpu variables */
 	struct raid5_percpu {
 		struct page	*spare_page; /* Used when checking P/Q in raid6 */
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index b7622c3..e1eca2a 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -282,6 +282,7 @@
 obj-$(CONFIG_USB_USBNET)        += usb/
 obj-$(CONFIG_USB_ZD1201)        += usb/
 obj-$(CONFIG_USB_IPHETH)        += usb/
+obj-$(CONFIG_USB_CDC_PHONET)   += usb/
 
 obj-$(CONFIG_WLAN) += wireless/
 obj-$(CONFIG_NET_TULIP) += tulip/
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 536038b..31798f5 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -1502,13 +1502,13 @@
 	 * firmware to wipe the ring without re-initializing it.
 	 */
 	if (!test_and_set_bit(0, &ap->std_refill_busy))
-		ace_load_std_rx_ring(ap, RX_RING_SIZE);
+		ace_load_std_rx_ring(dev, RX_RING_SIZE);
 	else
 		printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n",
 		       ap->name);
 	if (ap->version >= 2) {
 		if (!test_and_set_bit(0, &ap->mini_refill_busy))
-			ace_load_mini_rx_ring(ap, RX_MINI_SIZE);
+			ace_load_mini_rx_ring(dev, RX_MINI_SIZE);
 		else
 			printk(KERN_ERR "%s: Someone is busy refilling "
 			       "the RX mini ring\n", ap->name);
@@ -1584,9 +1584,10 @@
 }
 
 
-static void ace_tasklet(unsigned long dev)
+static void ace_tasklet(unsigned long arg)
 {
-	struct ace_private *ap = netdev_priv((struct net_device *)dev);
+	struct net_device *dev = (struct net_device *) arg;
+	struct ace_private *ap = netdev_priv(dev);
 	int cur_size;
 
 	cur_size = atomic_read(&ap->cur_rx_bufs);
@@ -1595,7 +1596,7 @@
 #ifdef DEBUG
 		printk("refilling buffers (current %i)\n", cur_size);
 #endif
-		ace_load_std_rx_ring(ap, RX_RING_SIZE - cur_size);
+		ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size);
 	}
 
 	if (ap->version >= 2) {
@@ -1606,7 +1607,7 @@
 			printk("refilling mini buffers (current %i)\n",
 			       cur_size);
 #endif
-			ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size);
+			ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size);
 		}
 	}
 
@@ -1616,7 +1617,7 @@
 #ifdef DEBUG
 		printk("refilling jumbo buffers (current %i)\n", cur_size);
 #endif
-		ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size);
+		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
 	}
 	ap->tasklet_pending = 0;
 }
@@ -1642,8 +1643,9 @@
  * done only before the device is enabled, thus no interrupts are
  * generated and by the interrupt handler/tasklet handler.
  */
-static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
 {
+	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
 	short i, idx;
 
@@ -1657,11 +1659,10 @@
 		struct rx_desc *rd;
 		dma_addr_t mapping;
 
-		skb = dev_alloc_skb(ACE_STD_BUFSIZE + NET_IP_ALIGN);
+		skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE);
 		if (!skb)
 			break;
 
-		skb_reserve(skb, NET_IP_ALIGN);
 		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
 				       offset_in_page(skb->data),
 				       ACE_STD_BUFSIZE,
@@ -1705,8 +1706,9 @@
 }
 
 
-static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs)
 {
+	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
 	short i, idx;
 
@@ -1718,11 +1720,10 @@
 		struct rx_desc *rd;
 		dma_addr_t mapping;
 
-		skb = dev_alloc_skb(ACE_MINI_BUFSIZE + NET_IP_ALIGN);
+		skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE);
 		if (!skb)
 			break;
 
-		skb_reserve(skb, NET_IP_ALIGN);
 		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
 				       offset_in_page(skb->data),
 				       ACE_MINI_BUFSIZE,
@@ -1762,8 +1763,9 @@
  * Load the jumbo rx ring, this may happen at any time if the MTU
  * is changed to a value > 1500.
  */
-static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs)
 {
+	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
 	short i, idx;
 
@@ -1774,11 +1776,10 @@
 		struct rx_desc *rd;
 		dma_addr_t mapping;
 
-		skb = dev_alloc_skb(ACE_JUMBO_BUFSIZE + NET_IP_ALIGN);
+		skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE);
 		if (!skb)
 			break;
 
-		skb_reserve(skb, NET_IP_ALIGN);
 		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
 				       offset_in_page(skb->data),
 				       ACE_JUMBO_BUFSIZE,
@@ -2196,7 +2197,7 @@
 #ifdef DEBUG
 				printk("low on std buffers %i\n", cur_size);
 #endif
-				ace_load_std_rx_ring(ap,
+				ace_load_std_rx_ring(dev,
 						     RX_RING_SIZE - cur_size);
 			} else
 				run_tasklet = 1;
@@ -2212,7 +2213,8 @@
 					printk("low on mini buffers %i\n",
 					       cur_size);
 #endif
-					ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size);
+					ace_load_mini_rx_ring(dev,
+							      RX_MINI_SIZE - cur_size);
 				} else
 					run_tasklet = 1;
 			}
@@ -2228,7 +2230,8 @@
 					printk("low on jumbo buffers %i\n",
 					       cur_size);
 #endif
-					ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size);
+					ace_load_jumbo_rx_ring(dev,
+							       RX_JUMBO_SIZE - cur_size);
 				} else
 					run_tasklet = 1;
 			}
@@ -2267,7 +2270,7 @@
 
 	if (ap->jumbo &&
 	    !test_and_set_bit(0, &ap->jumbo_refill_busy))
-		ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE);
+		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
 
 	if (dev->flags & IFF_PROMISC) {
 		cmd.evt = C_SET_PROMISC_MODE;
@@ -2575,7 +2578,7 @@
 			       "support\n", dev->name);
 			ap->jumbo = 1;
 			if (!test_and_set_bit(0, &ap->jumbo_refill_busy))
-				ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE);
+				ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
 			ace_set_rxtx_parms(dev, 1);
 		}
 	} else {
diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h
index f67dc9b..51c486c 100644
--- a/drivers/net/acenic.h
+++ b/drivers/net/acenic.h
@@ -766,9 +766,9 @@
  * Prototypes
  */
 static int ace_init(struct net_device *dev);
-static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs);
-static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs);
-static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs);
+static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs);
+static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs);
+static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs);
 static irqreturn_t ace_interrupt(int irq, void *dev_id);
 static int ace_load_firmware(struct net_device *dev);
 static int ace_open(struct net_device *dev);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 02842d0..38a83ac 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1557,8 +1557,10 @@
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
-			else
+			else {
 				ether_setup(bond_dev);
+				bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+			}
 
 			netdev_bonding_change(bond_dev,
 					      NETDEV_POST_TYPE_CHANGE);
@@ -4330,7 +4332,7 @@
 	bond_dev->tx_queue_len = 0;
 	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
 	bond_dev->priv_flags |= IFF_BONDING;
-	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 
 	/* At first, we block adding VLANs. That's the only way to
 	 * prevent problems that occur when adding VLANs over an
@@ -4691,7 +4693,7 @@
 		/* miimon and arp_interval not set, we need one so things
 		 * work as expected, see bonding.txt for details
 		 */
-		pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
+		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
 	}
 
 	if (primary && !USES_PRIMARY(bond_mode)) {
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index b60835f..2dfb4bf 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1025,6 +1025,7 @@
 	int i;
 	struct slave *slave;
 	struct bonding *bond = to_bond(d);
+	char ifname[IFNAMSIZ];
 
 	if (!rtnl_trylock())
 		return restart_syscall();
@@ -1035,32 +1036,33 @@
 	if (!USES_PRIMARY(bond->params.mode)) {
 		pr_info("%s: Unable to set primary slave; %s is in mode %d\n",
 			bond->dev->name, bond->dev->name, bond->params.mode);
-	} else {
-		bond_for_each_slave(bond, slave, i) {
-			if (strnicmp
-			    (slave->dev->name, buf,
-			     strlen(slave->dev->name)) == 0) {
-				pr_info("%s: Setting %s as primary slave.\n",
-					bond->dev->name, slave->dev->name);
-				bond->primary_slave = slave;
-				strcpy(bond->params.primary, slave->dev->name);
-				bond_select_active_slave(bond);
-				goto out;
-			}
-		}
+		goto out;
+	}
 
-		/* if we got here, then we didn't match the name of any slave */
+	sscanf(buf, "%16s", ifname); /* IFNAMSIZ */
 
-		if (strlen(buf) == 0 || buf[0] == '\n') {
-			pr_info("%s: Setting primary slave to None.\n",
-				bond->dev->name);
-			bond->primary_slave = NULL;
-				bond_select_active_slave(bond);
-		} else {
-			pr_info("%s: Unable to set %.*s as primary slave as it is not a slave.\n",
-				bond->dev->name, (int)strlen(buf) - 1, buf);
+	/* check to see if we are clearing primary */
+	if (!strlen(ifname) || buf[0] == '\n') {
+		pr_info("%s: Setting primary slave to None.\n",
+			bond->dev->name);
+		bond->primary_slave = NULL;
+		bond_select_active_slave(bond);
+		goto out;
+	}
+
+	bond_for_each_slave(bond, slave, i) {
+		if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
+			pr_info("%s: Setting %s as primary slave.\n",
+				bond->dev->name, slave->dev->name);
+			bond->primary_slave = slave;
+			strcpy(bond->params.primary, slave->dev->name);
+			bond_select_active_slave(bond);
+			goto out;
 		}
 	}
+
+	pr_info("%s: Unable to set %.*s as primary slave.\n",
+		bond->dev->name, (int)strlen(buf) - 1, buf);
 out:
 	write_unlock_bh(&bond->curr_slave_lock);
 	read_unlock(&bond->lock);
@@ -1195,6 +1197,7 @@
 	struct slave *old_active = NULL;
 	struct slave *new_active = NULL;
 	struct bonding *bond = to_bond(d);
+	char ifname[IFNAMSIZ];
 
 	if (!rtnl_trylock())
 		return restart_syscall();
@@ -1203,56 +1206,62 @@
 	read_lock(&bond->lock);
 	write_lock_bh(&bond->curr_slave_lock);
 
-	if (!USES_PRIMARY(bond->params.mode))
+	if (!USES_PRIMARY(bond->params.mode)) {
 		pr_info("%s: Unable to change active slave; %s is in mode %d\n",
 			bond->dev->name, bond->dev->name, bond->params.mode);
-	else {
-		bond_for_each_slave(bond, slave, i) {
-			if (strnicmp
-			    (slave->dev->name, buf,
-			     strlen(slave->dev->name)) == 0) {
-        			old_active = bond->curr_active_slave;
-        			new_active = slave;
-        			if (new_active == old_active) {
-					/* do nothing */
-					pr_info("%s: %s is already the current active slave.\n",
+		goto out;
+	}
+
+	sscanf(buf, "%16s", ifname); /* IFNAMSIZ */
+
+	/* check to see if we are clearing active */
+	if (!strlen(ifname) || buf[0] == '\n') {
+		pr_info("%s: Clearing current active slave.\n",
+			bond->dev->name);
+		bond->curr_active_slave = NULL;
+		bond_select_active_slave(bond);
+		goto out;
+	}
+
+	bond_for_each_slave(bond, slave, i) {
+		if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
+			old_active = bond->curr_active_slave;
+			new_active = slave;
+			if (new_active == old_active) {
+				/* do nothing */
+				pr_info("%s: %s is already the current"
+					" active slave.\n",
+					bond->dev->name,
+					slave->dev->name);
+				goto out;
+			}
+			else {
+				if ((new_active) &&
+				    (old_active) &&
+				    (new_active->link == BOND_LINK_UP) &&
+				    IS_UP(new_active->dev)) {
+					pr_info("%s: Setting %s as active"
+						" slave.\n",
 						bond->dev->name,
 						slave->dev->name);
-					goto out;
+					bond_change_active_slave(bond,
+								 new_active);
 				}
 				else {
-        				if ((new_active) &&
-            				    (old_active) &&
-				            (new_active->link == BOND_LINK_UP) &&
-				            IS_UP(new_active->dev)) {
-						pr_info("%s: Setting %s as active slave.\n",
-							bond->dev->name,
-							slave->dev->name);
-							bond_change_active_slave(bond, new_active);
-        				}
-					else {
-						pr_info("%s: Could not set %s as active slave; either %s is down or the link is down.\n",
-							bond->dev->name,
-							slave->dev->name,
-							slave->dev->name);
-					}
-					goto out;
+					pr_info("%s: Could not set %s as"
+						" active slave; either %s is"
+						" down or the link is down.\n",
+						bond->dev->name,
+						slave->dev->name,
+						slave->dev->name);
 				}
+				goto out;
 			}
 		}
-
-		/* if we got here, then we didn't match the name of any slave */
-
-		if (strlen(buf) == 0 || buf[0] == '\n') {
-			pr_info("%s: Setting active slave to None.\n",
-				bond->dev->name);
-			bond->primary_slave = NULL;
-			bond_select_active_slave(bond);
-		} else {
-			pr_info("%s: Unable to set %.*s as active slave as it is not a slave.\n",
-				bond->dev->name, (int)strlen(buf) - 1, buf);
-		}
 	}
+
+	pr_info("%s: Unable to set %.*s as active slave.\n",
+		bond->dev->name, (int)strlen(buf) - 1, buf);
  out:
 	write_unlock_bh(&bond->curr_slave_lock);
 	read_unlock(&bond->lock);
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index e64cd9c..e55df30 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2764,7 +2764,14 @@
 			prefetch(skb->data);
 
 			vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
-			if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
+
+			/*
+			 * There's need to check for NETIF_F_HW_VLAN_RX here.
+			 * Even if vlan rx accel is disabled,
+			 * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set.
+			 */
+			if (dev->features & NETIF_F_HW_VLAN_RX &&
+			    vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
 				u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
 
 				__vlan_hwaccel_put_tag(skb, vid);
@@ -5331,15 +5338,16 @@
 		np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
 		dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG |
 			NETIF_F_TSO | NETIF_F_RXCSUM;
-		dev->features |= dev->hw_features;
 	}
 
 	np->vlanctl_bits = 0;
 	if (id->driver_data & DEV_HAS_VLAN) {
 		np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
-		dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
+		dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
 	}
 
+	dev->features |= dev->hw_features;
+
 	np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
 	if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
 	    (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
@@ -5607,6 +5615,8 @@
 		goto out_error;
 	}
 
+	nv_vlan_mode(dev, dev->features);
+
 	netif_carrier_off(dev);
 
 	dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 835cd25..2659daa 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -388,12 +388,8 @@
 	if (priv->hwts_rx_en)
 		rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE;
 
-	/* keep vlan related bits if it's enabled */
-	if (ndev->features & NETIF_F_HW_VLAN_TX)
-		rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
-
 	if (ndev->features & NETIF_F_HW_VLAN_RX)
-		tctrl |= TCTRL_VLINS;
+		rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
 
 	/* Init rctrl based on our settings */
 	gfar_write(&regs->rctrl, rctrl);
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 6e82dd3..46b5f5f 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -183,7 +183,7 @@
 
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	random_ether_addr(dev->dev_addr);
 }
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index ba631fc..05172c3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -572,7 +572,7 @@
 {
 	ether_setup(dev);
 
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	dev->priv_flags	       &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->netdev_ops		= &macvlan_netdev_ops;
 	dev->destructor		= free_netdev;
 	dev->header_ops		= &macvlan_hard_header_ops,
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 8035765..dc3fbf6 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -190,6 +190,7 @@
 
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)		((tnapi)->tx_pending / 4)
+#define TG3_TX_BD_DMA_MAX		4096
 
 #define TG3_RAW_IP_ALIGN 2
 
@@ -4824,7 +4825,7 @@
 	txq = netdev_get_tx_queue(tp->dev, index);
 
 	while (sw_idx != hw_idx) {
-		struct ring_info *ri = &tnapi->tx_buffers[sw_idx];
+		struct tg3_tx_ring_info *ri = &tnapi->tx_buffers[sw_idx];
 		struct sk_buff *skb = ri->skb;
 		int i, tx_bug = 0;
 
@@ -4840,6 +4841,12 @@
 
 		ri->skb = NULL;
 
+		while (ri->fragmented) {
+			ri->fragmented = false;
+			sw_idx = NEXT_TX(sw_idx);
+			ri = &tnapi->tx_buffers[sw_idx];
+		}
+
 		sw_idx = NEXT_TX(sw_idx);
 
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -4851,6 +4858,13 @@
 				       dma_unmap_addr(ri, mapping),
 				       skb_shinfo(skb)->frags[i].size,
 				       PCI_DMA_TODEVICE);
+
+			while (ri->fragmented) {
+				ri->fragmented = false;
+				sw_idx = NEXT_TX(sw_idx);
+				ri = &tnapi->tx_buffers[sw_idx];
+			}
+
 			sw_idx = NEXT_TX(sw_idx);
 		}
 
@@ -5901,40 +5915,100 @@
 #endif
 }
 
-static void tg3_set_txd(struct tg3_napi *tnapi, int entry,
-			dma_addr_t mapping, int len, u32 flags,
-			u32 mss_and_is_end)
+static inline void tg3_tx_set_bd(struct tg3_tx_buffer_desc *txbd,
+				 dma_addr_t mapping, u32 len, u32 flags,
+				 u32 mss, u32 vlan)
 {
-	struct tg3_tx_buffer_desc *txd = &tnapi->tx_ring[entry];
-	int is_end = (mss_and_is_end & 0x1);
-	u32 mss = (mss_and_is_end >> 1);
-	u32 vlan_tag = 0;
-
-	if (is_end)
-		flags |= TXD_FLAG_END;
-	if (flags & TXD_FLAG_VLAN) {
-		vlan_tag = flags >> 16;
-		flags &= 0xffff;
-	}
-	vlan_tag |= (mss << TXD_MSS_SHIFT);
-
-	txd->addr_hi = ((u64) mapping >> 32);
-	txd->addr_lo = ((u64) mapping & 0xffffffff);
-	txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
-	txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+	txbd->addr_hi = ((u64) mapping >> 32);
+	txbd->addr_lo = ((u64) mapping & 0xffffffff);
+	txbd->len_flags = (len << TXD_LEN_SHIFT) | (flags & 0x0000ffff);
+	txbd->vlan_tag = (mss << TXD_MSS_SHIFT) | (vlan << TXD_VLAN_TAG_SHIFT);
 }
 
-static void tg3_skb_error_unmap(struct tg3_napi *tnapi,
-				struct sk_buff *skb, int last)
+static bool tg3_tx_frag_set(struct tg3_napi *tnapi, u32 *entry, u32 *budget,
+			    dma_addr_t map, u32 len, u32 flags,
+			    u32 mss, u32 vlan)
+{
+	struct tg3 *tp = tnapi->tp;
+	bool hwbug = false;
+
+	if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8)
+		hwbug = 1;
+
+	if (tg3_4g_overflow_test(map, len))
+		hwbug = 1;
+
+	if (tg3_40bit_overflow_test(tp, map, len))
+		hwbug = 1;
+
+	if (tg3_flag(tp, 4K_FIFO_LIMIT)) {
+		u32 tmp_flag = flags & ~TXD_FLAG_END;
+		while (len > TG3_TX_BD_DMA_MAX) {
+			u32 frag_len = TG3_TX_BD_DMA_MAX;
+			len -= TG3_TX_BD_DMA_MAX;
+
+			if (len) {
+				tnapi->tx_buffers[*entry].fragmented = true;
+				/* Avoid the 8byte DMA problem */
+				if (len <= 8) {
+					len += TG3_TX_BD_DMA_MAX / 2;
+					frag_len = TG3_TX_BD_DMA_MAX / 2;
+				}
+			} else
+				tmp_flag = flags;
+
+			if (*budget) {
+				tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+					      frag_len, tmp_flag, mss, vlan);
+				(*budget)--;
+				*entry = NEXT_TX(*entry);
+			} else {
+				hwbug = 1;
+				break;
+			}
+
+			map += frag_len;
+		}
+
+		if (len) {
+			if (*budget) {
+				tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+					      len, flags, mss, vlan);
+				(*budget)--;
+				*entry = NEXT_TX(*entry);
+			} else {
+				hwbug = 1;
+			}
+		}
+	} else {
+		tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+			      len, flags, mss, vlan);
+		*entry = NEXT_TX(*entry);
+	}
+
+	return hwbug;
+}
+
+static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
 {
 	int i;
-	u32 entry = tnapi->tx_prod;
-	struct ring_info *txb = &tnapi->tx_buffers[entry];
+	struct sk_buff *skb;
+	struct tg3_tx_ring_info *txb = &tnapi->tx_buffers[entry];
+
+	skb = txb->skb;
+	txb->skb = NULL;
 
 	pci_unmap_single(tnapi->tp->pdev,
 			 dma_unmap_addr(txb, mapping),
 			 skb_headlen(skb),
 			 PCI_DMA_TODEVICE);
+
+	while (txb->fragmented) {
+		txb->fragmented = false;
+		entry = NEXT_TX(entry);
+		txb = &tnapi->tx_buffers[entry];
+	}
+
 	for (i = 0; i < last; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
@@ -5944,18 +6018,24 @@
 		pci_unmap_page(tnapi->tp->pdev,
 			       dma_unmap_addr(txb, mapping),
 			       frag->size, PCI_DMA_TODEVICE);
+
+		while (txb->fragmented) {
+			txb->fragmented = false;
+			entry = NEXT_TX(entry);
+			txb = &tnapi->tx_buffers[entry];
+		}
 	}
 }
 
 /* Workaround 4GB and 40-bit hardware DMA bugs. */
 static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
 				       struct sk_buff *skb,
-				       u32 base_flags, u32 mss)
+				       u32 *entry, u32 *budget,
+				       u32 base_flags, u32 mss, u32 vlan)
 {
 	struct tg3 *tp = tnapi->tp;
 	struct sk_buff *new_skb;
 	dma_addr_t new_addr = 0;
-	u32 entry = tnapi->tx_prod;
 	int ret = 0;
 
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
@@ -5976,24 +6056,22 @@
 					  PCI_DMA_TODEVICE);
 		/* Make sure the mapping succeeded */
 		if (pci_dma_mapping_error(tp->pdev, new_addr)) {
-			ret = -1;
 			dev_kfree_skb(new_skb);
-
-		/* Make sure new skb does not cross any 4G boundaries.
-		 * Drop the packet if it does.
-		 */
-		} else if (tg3_4g_overflow_test(new_addr, new_skb->len)) {
-			pci_unmap_single(tp->pdev, new_addr, new_skb->len,
-					 PCI_DMA_TODEVICE);
 			ret = -1;
-			dev_kfree_skb(new_skb);
 		} else {
-			tnapi->tx_buffers[entry].skb = new_skb;
-			dma_unmap_addr_set(&tnapi->tx_buffers[entry],
+			base_flags |= TXD_FLAG_END;
+
+			tnapi->tx_buffers[*entry].skb = new_skb;
+			dma_unmap_addr_set(&tnapi->tx_buffers[*entry],
 					   mapping, new_addr);
 
-			tg3_set_txd(tnapi, entry, new_addr, new_skb->len,
-				    base_flags, 1 | (mss << 1));
+			if (tg3_tx_frag_set(tnapi, entry, budget, new_addr,
+					    new_skb->len, base_flags,
+					    mss, vlan)) {
+				tg3_tx_skb_unmap(tnapi, *entry, 0);
+				dev_kfree_skb(new_skb);
+				ret = -1;
+			}
 		}
 	}
 
@@ -6051,7 +6129,8 @@
 static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
-	u32 len, entry, base_flags, mss;
+	u32 len, entry, base_flags, mss, vlan = 0;
+	u32 budget;
 	int i = -1, would_hit_hwbug;
 	dma_addr_t mapping;
 	struct tg3_napi *tnapi;
@@ -6063,12 +6142,14 @@
 	if (tg3_flag(tp, ENABLE_TSS))
 		tnapi++;
 
+	budget = tg3_tx_avail(tnapi);
+
 	/* We are running in BH disabled context with netif_tx_lock
 	 * and TX reclaim runs via tp->napi.poll inside of a software
 	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 	 * no IRQ context deadlocks to worry about either.  Rejoice!
 	 */
-	if (unlikely(tg3_tx_avail(tnapi) <= (skb_shinfo(skb)->nr_frags + 1))) {
+	if (unlikely(budget <= (skb_shinfo(skb)->nr_frags + 1))) {
 		if (!netif_tx_queue_stopped(txq)) {
 			netif_tx_stop_queue(txq);
 
@@ -6153,9 +6234,12 @@
 		}
 	}
 
-	if (vlan_tx_tag_present(skb))
-		base_flags |= (TXD_FLAG_VLAN |
-			       (vlan_tx_tag_get(skb) << 16));
+#ifdef BCM_KERNEL_SUPPORTS_8021Q
+	if (vlan_tx_tag_present(skb)) {
+		base_flags |= TXD_FLAG_VLAN;
+		vlan = vlan_tx_tag_get(skb);
+	}
+#endif
 
 	if (tg3_flag(tp, USE_JUMBO_BDFLAG) &&
 	    !mss && skb->len > VLAN_ETH_FRAME_LEN)
@@ -6174,25 +6258,23 @@
 
 	would_hit_hwbug = 0;
 
-	if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8)
-		would_hit_hwbug = 1;
-
-	if (tg3_4g_overflow_test(mapping, len))
-		would_hit_hwbug = 1;
-
-	if (tg3_40bit_overflow_test(tp, mapping, len))
-		would_hit_hwbug = 1;
-
 	if (tg3_flag(tp, 5701_DMA_BUG))
 		would_hit_hwbug = 1;
 
-	tg3_set_txd(tnapi, entry, mapping, len, base_flags,
-		    (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
-
-	entry = NEXT_TX(entry);
+	if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, len, base_flags |
+			  ((skb_shinfo(skb)->nr_frags == 0) ? TXD_FLAG_END : 0),
+			    mss, vlan))
+		would_hit_hwbug = 1;
 
 	/* Now loop through additional data fragments, and queue them. */
 	if (skb_shinfo(skb)->nr_frags > 0) {
+		u32 tmp_mss = mss;
+
+		if (!tg3_flag(tp, HW_TSO_1) &&
+		    !tg3_flag(tp, HW_TSO_2) &&
+		    !tg3_flag(tp, HW_TSO_3))
+			tmp_mss = 0;
+
 		last = skb_shinfo(skb)->nr_frags - 1;
 		for (i = 0; i <= last; i++) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -6209,39 +6291,25 @@
 			if (pci_dma_mapping_error(tp->pdev, mapping))
 				goto dma_error;
 
-			if (tg3_flag(tp, SHORT_DMA_BUG) &&
-			    len <= 8)
+			if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping,
+					    len, base_flags |
+					    ((i == last) ? TXD_FLAG_END : 0),
+					    tmp_mss, vlan))
 				would_hit_hwbug = 1;
-
-			if (tg3_4g_overflow_test(mapping, len))
-				would_hit_hwbug = 1;
-
-			if (tg3_40bit_overflow_test(tp, mapping, len))
-				would_hit_hwbug = 1;
-
-			if (tg3_flag(tp, HW_TSO_1) ||
-			    tg3_flag(tp, HW_TSO_2) ||
-			    tg3_flag(tp, HW_TSO_3))
-				tg3_set_txd(tnapi, entry, mapping, len,
-					    base_flags, (i == last)|(mss << 1));
-			else
-				tg3_set_txd(tnapi, entry, mapping, len,
-					    base_flags, (i == last));
-
-			entry = NEXT_TX(entry);
 		}
 	}
 
 	if (would_hit_hwbug) {
-		tg3_skb_error_unmap(tnapi, skb, i);
+		tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
 
 		/* If the workaround fails due to memory/mapping
 		 * failure, silently drop this packet.
 		 */
-		if (tigon3_dma_hwbug_workaround(tnapi, skb, base_flags, mss))
+		entry = tnapi->tx_prod;
+		budget = tg3_tx_avail(tnapi);
+		if (tigon3_dma_hwbug_workaround(tnapi, skb, &entry, &budget,
+						base_flags, mss, vlan))
 			goto out_unlock;
-
-		entry = NEXT_TX(tnapi->tx_prod);
 	}
 
 	skb_tx_timestamp(skb);
@@ -6269,7 +6337,7 @@
 	return NETDEV_TX_OK;
 
 dma_error:
-	tg3_skb_error_unmap(tnapi, skb, i);
+	tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
 	dev_kfree_skb(skb);
 	tnapi->tx_buffers[tnapi->tx_prod].skb = NULL;
 	return NETDEV_TX_OK;
@@ -6602,35 +6670,13 @@
 		if (!tnapi->tx_buffers)
 			continue;
 
-		for (i = 0; i < TG3_TX_RING_SIZE; ) {
-			struct ring_info *txp;
-			struct sk_buff *skb;
-			unsigned int k;
+		for (i = 0; i < TG3_TX_RING_SIZE; i++) {
+			struct sk_buff *skb = tnapi->tx_buffers[i].skb;
 
-			txp = &tnapi->tx_buffers[i];
-			skb = txp->skb;
-
-			if (skb == NULL) {
-				i++;
+			if (!skb)
 				continue;
-			}
 
-			pci_unmap_single(tp->pdev,
-					 dma_unmap_addr(txp, mapping),
-					 skb_headlen(skb),
-					 PCI_DMA_TODEVICE);
-			txp->skb = NULL;
-
-			i++;
-
-			for (k = 0; k < skb_shinfo(skb)->nr_frags; k++) {
-				txp = &tnapi->tx_buffers[i & (TG3_TX_RING_SIZE - 1)];
-				pci_unmap_page(tp->pdev,
-					       dma_unmap_addr(txp, mapping),
-					       skb_shinfo(skb)->frags[k].size,
-					       PCI_DMA_TODEVICE);
-				i++;
-			}
+			tg3_tx_skb_unmap(tnapi, i, skb_shinfo(skb)->nr_frags);
 
 			dev_kfree_skb_any(skb);
 		}
@@ -6762,9 +6808,9 @@
 		 */
 		if ((!i && !tg3_flag(tp, ENABLE_TSS)) ||
 		    (i && tg3_flag(tp, ENABLE_TSS))) {
-			tnapi->tx_buffers = kzalloc(sizeof(struct ring_info) *
-						    TG3_TX_RING_SIZE,
-						    GFP_KERNEL);
+			tnapi->tx_buffers = kzalloc(
+					       sizeof(struct tg3_tx_ring_info) *
+					       TG3_TX_RING_SIZE, GFP_KERNEL);
 			if (!tnapi->tx_buffers)
 				goto err_out;
 
@@ -8360,7 +8406,7 @@
 	/* Program the jumbo buffer descriptor ring control
 	 * blocks on those devices that have them.
 	 */
-	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0 ||
 	    (tg3_flag(tp, JUMBO_CAPABLE) && !tg3_flag(tp, 5780_CLASS))) {
 
 		if (tg3_flag(tp, JUMBO_RING_ENABLE)) {
@@ -11204,6 +11250,7 @@
 {
 	u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key;
 	u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
+	u32 budget;
 	struct sk_buff *skb, *rx_skb;
 	u8 *tx_data;
 	dma_addr_t map;
@@ -11363,6 +11410,10 @@
 		return -EIO;
 	}
 
+	val = tnapi->tx_prod;
+	tnapi->tx_buffers[val].skb = skb;
+	dma_unmap_addr_set(&tnapi->tx_buffers[val], mapping, map);
+
 	tw32_f(HOSTCC_MODE, tp->coalesce_mode | HOSTCC_MODE_ENABLE |
 	       rnapi->coal_now);
 
@@ -11370,8 +11421,13 @@
 
 	rx_start_idx = rnapi->hw_status->idx[0].rx_producer;
 
-	tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len,
-		    base_flags, (mss << 1) | 1);
+	budget = tg3_tx_avail(tnapi);
+	if (tg3_tx_frag_set(tnapi, &val, &budget, map, tx_len,
+			    base_flags | TXD_FLAG_END, mss, 0)) {
+		tnapi->tx_buffers[val].skb = NULL;
+		dev_kfree_skb(skb);
+		return -EIO;
+	}
 
 	tnapi->tx_prod++;
 
@@ -11394,7 +11450,7 @@
 			break;
 	}
 
-	pci_unmap_single(tp->pdev, map, tx_len, PCI_DMA_TODEVICE);
+	tg3_tx_skb_unmap(tnapi, tnapi->tx_prod - 1, 0);
 	dev_kfree_skb(skb);
 
 	if (tx_idx != tnapi->tx_prod)
@@ -13817,7 +13873,7 @@
 		tg3_flag_set(tp, 5705_PLUS);
 
 	/* Determine TSO capabilities */
-	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0)
 		; /* Do nothing. HW bug. */
 	else if (tg3_flag(tp, 57765_PLUS))
 		tg3_flag_set(tp, HW_TSO_3);
@@ -13880,11 +13936,14 @@
 	if (tg3_flag(tp, 5755_PLUS))
 		tg3_flag_set(tp, SHORT_DMA_BUG);
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+		tg3_flag_set(tp, 4K_FIFO_LIMIT);
+
 	if (tg3_flag(tp, 5717_PLUS))
 		tg3_flag_set(tp, LRG_PROD_RING_CAP);
 
 	if (tg3_flag(tp, 57765_PLUS) &&
-	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5719)
+	    tp->pci_chip_rev_id != CHIPREV_ID_5719_A0)
 		tg3_flag_set(tp, USE_JUMBO_BDFLAG);
 
 	if (!tg3_flag(tp, 5705_PLUS) ||
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 691539b..2ea456d 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2652,6 +2652,12 @@
 	DEFINE_DMA_UNMAP_ADDR(mapping);
 };
 
+struct tg3_tx_ring_info {
+	struct sk_buff			*skb;
+	DEFINE_DMA_UNMAP_ADDR(mapping);
+	bool				fragmented;
+};
+
 struct tg3_link_config {
 	/* Describes what we're trying to get. */
 	u32				advertising;
@@ -2816,7 +2822,7 @@
 	u32				last_tx_cons;
 	u32				prodmbox;
 	struct tg3_tx_buffer_desc	*tx_ring;
-	struct ring_info		*tx_buffers;
+	struct tg3_tx_ring_info		*tx_buffers;
 
 	dma_addr_t			status_mapping;
 	dma_addr_t			rx_rcb_mapping;
@@ -2899,6 +2905,7 @@
 	TG3_FLAG_57765_PLUS,
 	TG3_FLAG_APE_HAS_NCSI,
 	TG3_FLAG_5717_PLUS,
+	TG3_FLAG_4K_FIFO_LIMIT,
 
 	/* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */
 	TG3_FLAG_NUMBER_OF_FLAGS,	/* Last entry in enum TG3_FLAGS */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9a6b382..71f3d1a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -528,6 +528,7 @@
 		dev->netdev_ops = &tap_netdev_ops;
 		/* Ethernet TAP Device */
 		ether_setup(dev);
+		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
 		random_ether_addr(dev->dev_addr);
 
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 5250288..c5c4b4d 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -314,12 +314,11 @@
 	skb_pull(skb, 4);
 
 	while (skb->len > 0) {
-		if ((short)(header & 0x0000ffff) !=
-		    ~((short)((header & 0xffff0000) >> 16))) {
+		if ((header & 0x07ff) != ((~header >> 16) & 0x07ff))
 			netdev_err(dev->net, "asix_rx_fixup() Bad Header Length\n");
-		}
+
 		/* get the packet length */
-		size = (u16) (header & 0x0000ffff);
+		size = (u16) (header & 0x000007ff);
 
 		if ((skb->len) - ((size + 1) & 0xfffe) == 0) {
 			u8 alignment = (unsigned long)skb->data & 0x3;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 7f78db7..5b23767 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -263,6 +263,8 @@
 {
 	ether_setup(dev);
 
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+
 	dev->netdev_ops = &veth_netdev_ops;
 	dev->ethtool_ops = &veth_ethtool_ops;
 	dev->features |= NETIF_F_LLTX;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index b25c922..eb20281 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1074,9 +1074,10 @@
 
 	used = pvc_is_used(pvc);
 
-	if (type == ARPHRD_ETHER)
+	if (type == ARPHRD_ETHER) {
 		dev = alloc_netdev(0, "pvceth%d", ether_setup);
-	else
+		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	} else
 		dev = alloc_netdev(0, "pvc%d", pvc_setup);
 
 	if (!dev) {
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 55cf71f..e1b3e3c 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2823,6 +2823,7 @@
 	dev->wireless_data = &ai->wireless_data;
 	dev->irq = irq;
 	dev->base_addr = port;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
 	SET_NETDEV_DEV(dev, dmdev);
 
diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index d2293dc..3cab843 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -28,7 +28,7 @@
 
 config B43_BCMA
 	bool "Support for BCMA bus"
-	depends on B43 && BCMA && BROKEN
+	depends on B43 && BCMA
 	default y
 
 config B43_SSB
diff --git a/drivers/net/wireless/b43/bus.c b/drivers/net/wireless/b43/bus.c
index 64c3f65..05f6c7b 100644
--- a/drivers/net/wireless/b43/bus.c
+++ b/drivers/net/wireless/b43/bus.c
@@ -244,10 +244,12 @@
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
 		bcma_set_drvdata(dev->bdev, wldev);
+		break;
 #endif
 #ifdef CONFIG_B43_SSB
 	case B43_BUS_SSB:
 		ssb_set_drvdata(dev->sdev, wldev);
+		break;
 #endif
 	}
 }
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 032d466..26f1ab8 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -5350,6 +5350,7 @@
 {
 	struct b43_wl *wl = ssb_get_devtypedata(sdev);
 	struct b43_wldev *wldev = ssb_get_drvdata(sdev);
+	struct b43_bus_dev *dev = wldev->dev;
 
 	/* We must cancel any work here before unregistering from ieee80211,
 	 * as the ieee80211 unreg will destroy the workqueue. */
@@ -5365,14 +5366,14 @@
 		ieee80211_unregister_hw(wl->hw);
 	}
 
-	b43_one_core_detach(wldev->dev);
+	b43_one_core_detach(dev);
 
 	if (list_empty(&wl->devlist)) {
 		b43_leds_unregister(wl);
 		/* Last core on the chip unregistered.
 		 * We can destroy common struct b43_wl.
 		 */
-		b43_wireless_exit(wldev->dev, wl);
+		b43_wireless_exit(dev, wl);
 	}
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index d508482..89a116f 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -855,6 +855,7 @@
 
 	iface = netdev_priv(dev);
 	ether_setup(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
 	/* kernel callbacks */
 	if (iface) {
diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c
index 0372315..c77e054 100644
--- a/drivers/nfc/pn533.c
+++ b/drivers/nfc/pn533.c
@@ -1596,7 +1596,7 @@
 	usb_free_urb(dev->out_urb);
 	kfree(dev);
 
-	nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected");
+	nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected");
 }
 
 static struct usb_driver pn533_driver = {
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 77cb2a1..81525ae 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -55,7 +55,7 @@
 	SMBIOS_ATTR_INSTANCE_SHOW,
 };
 
-static mode_t
+static size_t
 find_smbios_instance_string(struct pci_dev *pdev, char *buf,
 			    enum smbios_attr_enum attribute)
 {
diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 081c171..5ce5170 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -397,7 +397,7 @@
 };
 
 struct be_cmd_bhs {
-	struct iscsi_cmd iscsi_hdr;
+	struct iscsi_scsi_req iscsi_hdr;
 	unsigned char pad1[16];
 	struct pdu_data_out iscsi_data_pdu;
 	unsigned char pad2[BE_SENSE_INFO_SIZE -
@@ -428,7 +428,7 @@
 };
 
 struct be_status_bhs {
-	struct iscsi_cmd iscsi_hdr;
+	struct iscsi_scsi_req iscsi_hdr;
 	unsigned char pad1[16];
 	/**
 	 * The plus 2 below is to hold the sense info length that gets
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 030a96c..9ae80cd 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -332,11 +332,11 @@
 {
 	struct bnx2i_cmd *bnx2i_cmd;
 	struct bnx2i_login_request *login_wqe;
-	struct iscsi_login *login_hdr;
+	struct iscsi_login_req *login_hdr;
 	u32 dword;
 
 	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
-	login_hdr = (struct iscsi_login *)task->hdr;
+	login_hdr = (struct iscsi_login_req *)task->hdr;
 	login_wqe = (struct bnx2i_login_request *)
 						bnx2i_conn->ep->qp.sq_prod_qe;
 
@@ -1349,7 +1349,7 @@
 	struct bnx2i_cmd_response *resp_cqe;
 	struct bnx2i_cmd *bnx2i_cmd;
 	struct iscsi_task *task;
-	struct iscsi_cmd_rsp *hdr;
+	struct iscsi_scsi_rsp *hdr;
 	u32 datalen = 0;
 
 	resp_cqe = (struct bnx2i_cmd_response *)cqe;
@@ -1376,7 +1376,7 @@
 	}
 	bnx2i_iscsi_unmap_sg_list(bnx2i_cmd);
 
-	hdr = (struct iscsi_cmd_rsp *)task->hdr;
+	hdr = (struct iscsi_scsi_rsp *)task->hdr;
 	resp_cqe = (struct bnx2i_cmd_response *)cqe;
 	hdr->opcode = resp_cqe->op_code;
 	hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn);
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index 5c55a75..cffd4d7 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1213,7 +1213,7 @@
 	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
 	struct scsi_cmnd *sc = task->sc;
 	struct bnx2i_cmd *cmd = task->dd_data;
-	struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr;
+	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
 
 	if (atomic_read(&bnx2i_conn->ep->num_active_cmds) + 1  >
 	    hba->max_sqes)
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index d7a4120..256a999 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -84,22 +84,6 @@
 					     __func__, ##arg);		\
 	} while (0);
 
-/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
-#define SNA32_CHECK 2147483648UL
-
-static int iscsi_sna_lt(u32 n1, u32 n2)
-{
-	return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
-			    (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
-}
-
-/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
-static int iscsi_sna_lte(u32 n1, u32 n2)
-{
-	return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
-			    (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
-}
-
 inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
 {
 	struct Scsi_Host *shost = conn->session->host;
@@ -360,7 +344,7 @@
 	struct iscsi_conn *conn = task->conn;
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
-	struct iscsi_cmd *hdr;
+	struct iscsi_scsi_req *hdr;
 	unsigned hdrlength, cmd_len;
 	itt_t itt;
 	int rc;
@@ -374,7 +358,7 @@
 		if (rc)
 			return rc;
 	}
-	hdr = (struct iscsi_cmd *) task->hdr;
+	hdr = (struct iscsi_scsi_req *)task->hdr;
 	itt = hdr->itt;
 	memset(hdr, 0, sizeof(*hdr));
 
@@ -830,7 +814,7 @@
 			       struct iscsi_task *task, char *data,
 			       int datalen)
 {
-	struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
+	struct iscsi_scsi_rsp *rhdr = (struct iscsi_scsi_rsp *)hdr;
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 
diff --git a/drivers/staging/ath6kl/os/linux/ar6000_drv.c b/drivers/staging/ath6kl/os/linux/ar6000_drv.c
index 499b7a9..32ee39a 100644
--- a/drivers/staging/ath6kl/os/linux/ar6000_drv.c
+++ b/drivers/staging/ath6kl/os/linux/ar6000_drv.c
@@ -6205,6 +6205,7 @@
     
     ether_setup(dev);
     init_netdev(dev, ap_ifname);
+    dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
     if (register_netdev(dev)) {
         AR_DEBUG_PRINTF(ATH_DEBUG_ERR,("ar6000_create_ap_interface: register_netdev failed\n"));
diff --git a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h
index 5711e7c..40e3d37 100644
--- a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h
+++ b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h
@@ -24,8 +24,6 @@
 #define BRCMS_SET_SHORTSLOT_OVERRIDE		146
 
 
-#include <linux/interrupt.h>
-
 /* BMAC Note: High-only driver is no longer working in softirq context as it needs to block and
  * sleep so perimeter lock has to be a semaphore instead of spinlock. This requires timers to be
  * submitted to workqueue instead of being on kernel timer
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index 5cb0f0e..b28794b 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,5 +31,6 @@
 
 source "drivers/target/loopback/Kconfig"
 source "drivers/target/tcm_fc/Kconfig"
+source "drivers/target/iscsi/Kconfig"
 
 endif
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 21df808..1060c7b 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -24,5 +24,5 @@
 
 # Fabric modules
 obj-$(CONFIG_LOOPBACK_TARGET)	+= loopback/
-
 obj-$(CONFIG_TCM_FC)		+= tcm_fc/
+obj-$(CONFIG_ISCSI_TARGET)	+= iscsi/
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
new file mode 100644
index 0000000..564ff4e
--- /dev/null
+++ b/drivers/target/iscsi/Kconfig
@@ -0,0 +1,8 @@
+config ISCSI_TARGET
+	tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
+	select CRYPTO
+	select CRYPTO_CRC32C
+	select CRYPTO_CRC32C_INTEL if X86
+	help
+	Say M here to enable the ConfigFS enabled Linux-iSCSI.org iSCSI
+	Target Mode Stack.
diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile
new file mode 100644
index 0000000..5b9a2cf
--- /dev/null
+++ b/drivers/target/iscsi/Makefile
@@ -0,0 +1,20 @@
+iscsi_target_mod-y +=		iscsi_target_parameters.o \
+				iscsi_target_seq_pdu_list.o \
+				iscsi_target_tq.o \
+				iscsi_target_auth.o \
+				iscsi_target_datain_values.o \
+				iscsi_target_device.o \
+				iscsi_target_erl0.o \
+				iscsi_target_erl1.o \
+				iscsi_target_erl2.o \
+				iscsi_target_login.o \
+				iscsi_target_nego.o \
+				iscsi_target_nodeattrib.o \
+				iscsi_target_tmr.o \
+				iscsi_target_tpg.o \
+				iscsi_target_util.o \
+				iscsi_target.o \
+				iscsi_target_configfs.o \
+				iscsi_target_stat.o
+
+obj-$(CONFIG_ISCSI_TARGET)	+= iscsi_target_mod.o
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
new file mode 100644
index 0000000..14c81c4
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -0,0 +1,4559 @@
+/*******************************************************************************
+ * This file contains main functions related to the iSCSI Target Core Driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <linux/completion.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi_device.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tmr.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_configfs.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_tmr.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_stat.h"
+
+static LIST_HEAD(g_tiqn_list);
+static LIST_HEAD(g_np_list);
+static DEFINE_SPINLOCK(tiqn_lock);
+static DEFINE_SPINLOCK(np_lock);
+
+static struct idr tiqn_idr;
+struct idr sess_idr;
+struct mutex auth_id_lock;
+spinlock_t sess_idr_lock;
+
+struct iscsit_global *iscsit_global;
+
+struct kmem_cache *lio_cmd_cache;
+struct kmem_cache *lio_qr_cache;
+struct kmem_cache *lio_dr_cache;
+struct kmem_cache *lio_ooo_cache;
+struct kmem_cache *lio_r2t_cache;
+
+static int iscsit_handle_immediate_data(struct iscsi_cmd *,
+			unsigned char *buf, u32);
+static int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
+
+struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *buf)
+{
+	struct iscsi_tiqn *tiqn = NULL;
+
+	spin_lock(&tiqn_lock);
+	list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) {
+		if (!strcmp(tiqn->tiqn, buf)) {
+
+			spin_lock(&tiqn->tiqn_state_lock);
+			if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) {
+				tiqn->tiqn_access_count++;
+				spin_unlock(&tiqn->tiqn_state_lock);
+				spin_unlock(&tiqn_lock);
+				return tiqn;
+			}
+			spin_unlock(&tiqn->tiqn_state_lock);
+		}
+	}
+	spin_unlock(&tiqn_lock);
+
+	return NULL;
+}
+
+static int iscsit_set_tiqn_shutdown(struct iscsi_tiqn *tiqn)
+{
+	spin_lock(&tiqn->tiqn_state_lock);
+	if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) {
+		tiqn->tiqn_state = TIQN_STATE_SHUTDOWN;
+		spin_unlock(&tiqn->tiqn_state_lock);
+		return 0;
+	}
+	spin_unlock(&tiqn->tiqn_state_lock);
+
+	return -1;
+}
+
+void iscsit_put_tiqn_for_login(struct iscsi_tiqn *tiqn)
+{
+	spin_lock(&tiqn->tiqn_state_lock);
+	tiqn->tiqn_access_count--;
+	spin_unlock(&tiqn->tiqn_state_lock);
+}
+
+/*
+ * Note that IQN formatting is expected to be done in userspace, and
+ * no explict IQN format checks are done here.
+ */
+struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
+{
+	struct iscsi_tiqn *tiqn = NULL;
+	int ret;
+
+	if (strlen(buf) > ISCSI_IQN_LEN) {
+		pr_err("Target IQN exceeds %d bytes\n",
+				ISCSI_IQN_LEN);
+		return ERR_PTR(-EINVAL);
+	}
+
+	tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL);
+	if (!tiqn) {
+		pr_err("Unable to allocate struct iscsi_tiqn\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	sprintf(tiqn->tiqn, "%s", buf);
+	INIT_LIST_HEAD(&tiqn->tiqn_list);
+	INIT_LIST_HEAD(&tiqn->tiqn_tpg_list);
+	spin_lock_init(&tiqn->tiqn_state_lock);
+	spin_lock_init(&tiqn->tiqn_tpg_lock);
+	spin_lock_init(&tiqn->sess_err_stats.lock);
+	spin_lock_init(&tiqn->login_stats.lock);
+	spin_lock_init(&tiqn->logout_stats.lock);
+
+	if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) {
+		pr_err("idr_pre_get() for tiqn_idr failed\n");
+		kfree(tiqn);
+		return ERR_PTR(-ENOMEM);
+	}
+	tiqn->tiqn_state = TIQN_STATE_ACTIVE;
+
+	spin_lock(&tiqn_lock);
+	ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index);
+	if (ret < 0) {
+		pr_err("idr_get_new() failed for tiqn->tiqn_index\n");
+		spin_unlock(&tiqn_lock);
+		kfree(tiqn);
+		return ERR_PTR(ret);
+	}
+	list_add_tail(&tiqn->tiqn_list, &g_tiqn_list);
+	spin_unlock(&tiqn_lock);
+
+	pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn);
+
+	return tiqn;
+
+}
+
+static void iscsit_wait_for_tiqn(struct iscsi_tiqn *tiqn)
+{
+	/*
+	 * Wait for accesses to said struct iscsi_tiqn to end.
+	 */
+	spin_lock(&tiqn->tiqn_state_lock);
+	while (tiqn->tiqn_access_count != 0) {
+		spin_unlock(&tiqn->tiqn_state_lock);
+		msleep(10);
+		spin_lock(&tiqn->tiqn_state_lock);
+	}
+	spin_unlock(&tiqn->tiqn_state_lock);
+}
+
+void iscsit_del_tiqn(struct iscsi_tiqn *tiqn)
+{
+	/*
+	 * iscsit_set_tiqn_shutdown sets tiqn->tiqn_state = TIQN_STATE_SHUTDOWN
+	 * while holding tiqn->tiqn_state_lock.  This means that all subsequent
+	 * attempts to access this struct iscsi_tiqn will fail from both transport
+	 * fabric and control code paths.
+	 */
+	if (iscsit_set_tiqn_shutdown(tiqn) < 0) {
+		pr_err("iscsit_set_tiqn_shutdown() failed\n");
+		return;
+	}
+
+	iscsit_wait_for_tiqn(tiqn);
+
+	spin_lock(&tiqn_lock);
+	list_del(&tiqn->tiqn_list);
+	idr_remove(&tiqn_idr, tiqn->tiqn_index);
+	spin_unlock(&tiqn_lock);
+
+	pr_debug("CORE[0] - Deleted iSCSI Target IQN: %s\n",
+			tiqn->tiqn);
+	kfree(tiqn);
+}
+
+int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+{
+	int ret;
+	/*
+	 * Determine if the network portal is accepting storage traffic.
+	 */
+	spin_lock_bh(&np->np_thread_lock);
+	if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+		spin_unlock_bh(&np->np_thread_lock);
+		return -1;
+	}
+	if (np->np_login_tpg) {
+		pr_err("np->np_login_tpg() is not NULL!\n");
+		spin_unlock_bh(&np->np_thread_lock);
+		return -1;
+	}
+	spin_unlock_bh(&np->np_thread_lock);
+	/*
+	 * Determine if the portal group is accepting storage traffic.
+	 */
+	spin_lock_bh(&tpg->tpg_state_lock);
+	if (tpg->tpg_state != TPG_STATE_ACTIVE) {
+		spin_unlock_bh(&tpg->tpg_state_lock);
+		return -1;
+	}
+	spin_unlock_bh(&tpg->tpg_state_lock);
+
+	/*
+	 * Here we serialize access across the TIQN+TPG Tuple.
+	 */
+	ret = mutex_lock_interruptible(&tpg->np_login_lock);
+	if ((ret != 0) || signal_pending(current))
+		return -1;
+
+	spin_lock_bh(&np->np_thread_lock);
+	np->np_login_tpg = tpg;
+	spin_unlock_bh(&np->np_thread_lock);
+
+	return 0;
+}
+
+int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+{
+	struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+	spin_lock_bh(&np->np_thread_lock);
+	np->np_login_tpg = NULL;
+	spin_unlock_bh(&np->np_thread_lock);
+
+	mutex_unlock(&tpg->np_login_lock);
+
+	if (tiqn)
+		iscsit_put_tiqn_for_login(tiqn);
+
+	return 0;
+}
+
+static struct iscsi_np *iscsit_get_np(
+	struct __kernel_sockaddr_storage *sockaddr,
+	int network_transport)
+{
+	struct sockaddr_in *sock_in, *sock_in_e;
+	struct sockaddr_in6 *sock_in6, *sock_in6_e;
+	struct iscsi_np *np;
+	int ip_match = 0;
+	u16 port;
+
+	spin_lock_bh(&np_lock);
+	list_for_each_entry(np, &g_np_list, np_list) {
+		spin_lock(&np->np_thread_lock);
+		if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+			spin_unlock(&np->np_thread_lock);
+			continue;
+		}
+
+		if (sockaddr->ss_family == AF_INET6) {
+			sock_in6 = (struct sockaddr_in6 *)sockaddr;
+			sock_in6_e = (struct sockaddr_in6 *)&np->np_sockaddr;
+
+			if (!memcmp((void *)&sock_in6->sin6_addr.in6_u,
+				    (void *)&sock_in6_e->sin6_addr.in6_u,
+				    sizeof(struct in6_addr)))
+				ip_match = 1;
+
+			port = ntohs(sock_in6->sin6_port);
+		} else {
+			sock_in = (struct sockaddr_in *)sockaddr;
+			sock_in_e = (struct sockaddr_in *)&np->np_sockaddr;
+
+			if (sock_in->sin_addr.s_addr ==
+			    sock_in_e->sin_addr.s_addr)
+				ip_match = 1;
+
+			port = ntohs(sock_in->sin_port);
+		}
+
+		if ((ip_match == 1) && (np->np_port == port) &&
+		    (np->np_network_transport == network_transport)) {
+			/*
+			 * Increment the np_exports reference count now to
+			 * prevent iscsit_del_np() below from being called
+			 * while iscsi_tpg_add_network_portal() is called.
+			 */
+			np->np_exports++;
+			spin_unlock(&np->np_thread_lock);
+			spin_unlock_bh(&np_lock);
+			return np;
+		}
+		spin_unlock(&np->np_thread_lock);
+	}
+	spin_unlock_bh(&np_lock);
+
+	return NULL;
+}
+
+struct iscsi_np *iscsit_add_np(
+	struct __kernel_sockaddr_storage *sockaddr,
+	char *ip_str,
+	int network_transport)
+{
+	struct sockaddr_in *sock_in;
+	struct sockaddr_in6 *sock_in6;
+	struct iscsi_np *np;
+	int ret;
+	/*
+	 * Locate the existing struct iscsi_np if already active..
+	 */
+	np = iscsit_get_np(sockaddr, network_transport);
+	if (np)
+		return np;
+
+	np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL);
+	if (!np) {
+		pr_err("Unable to allocate memory for struct iscsi_np\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	np->np_flags |= NPF_IP_NETWORK;
+	if (sockaddr->ss_family == AF_INET6) {
+		sock_in6 = (struct sockaddr_in6 *)sockaddr;
+		snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str);
+		np->np_port = ntohs(sock_in6->sin6_port);
+	} else {
+		sock_in = (struct sockaddr_in *)sockaddr;
+		sprintf(np->np_ip, "%s", ip_str);
+		np->np_port = ntohs(sock_in->sin_port);
+	}
+
+	np->np_network_transport = network_transport;
+	spin_lock_init(&np->np_thread_lock);
+	init_completion(&np->np_restart_comp);
+	INIT_LIST_HEAD(&np->np_list);
+
+	ret = iscsi_target_setup_login_socket(np, sockaddr);
+	if (ret != 0) {
+		kfree(np);
+		return ERR_PTR(ret);
+	}
+
+	np->np_thread = kthread_run(iscsi_target_login_thread, np, "iscsi_np");
+	if (IS_ERR(np->np_thread)) {
+		pr_err("Unable to create kthread: iscsi_np\n");
+		ret = PTR_ERR(np->np_thread);
+		kfree(np);
+		return ERR_PTR(ret);
+	}
+	/*
+	 * Increment the np_exports reference count now to prevent
+	 * iscsit_del_np() below from being run while a new call to
+	 * iscsi_tpg_add_network_portal() for a matching iscsi_np is
+	 * active.  We don't need to hold np->np_thread_lock at this
+	 * point because iscsi_np has not been added to g_np_list yet.
+	 */
+	np->np_exports = 1;
+
+	spin_lock_bh(&np_lock);
+	list_add_tail(&np->np_list, &g_np_list);
+	spin_unlock_bh(&np_lock);
+
+	pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n",
+		np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ?
+		"TCP" : "SCTP");
+
+	return np;
+}
+
+int iscsit_reset_np_thread(
+	struct iscsi_np *np,
+	struct iscsi_tpg_np *tpg_np,
+	struct iscsi_portal_group *tpg)
+{
+	spin_lock_bh(&np->np_thread_lock);
+	if (tpg && tpg_np) {
+		/*
+		 * The reset operation need only be performed when the
+		 * passed struct iscsi_portal_group has a login in progress
+		 * to one of the network portals.
+		 */
+		if (tpg_np->tpg_np->np_login_tpg != tpg) {
+			spin_unlock_bh(&np->np_thread_lock);
+			return 0;
+		}
+	}
+	if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) {
+		spin_unlock_bh(&np->np_thread_lock);
+		return 0;
+	}
+	np->np_thread_state = ISCSI_NP_THREAD_RESET;
+
+	if (np->np_thread) {
+		spin_unlock_bh(&np->np_thread_lock);
+		send_sig(SIGINT, np->np_thread, 1);
+		wait_for_completion(&np->np_restart_comp);
+		spin_lock_bh(&np->np_thread_lock);
+	}
+	spin_unlock_bh(&np->np_thread_lock);
+
+	return 0;
+}
+
+int iscsit_del_np_comm(struct iscsi_np *np)
+{
+	if (!np->np_socket)
+		return 0;
+
+	/*
+	 * Some network transports allocate their own struct sock->file,
+	 * see  if we need to free any additional allocated resources.
+	 */
+	if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+		kfree(np->np_socket->file);
+		np->np_socket->file = NULL;
+	}
+
+	sock_release(np->np_socket);
+	return 0;
+}
+
+int iscsit_del_np(struct iscsi_np *np)
+{
+	spin_lock_bh(&np->np_thread_lock);
+	np->np_exports--;
+	if (np->np_exports) {
+		spin_unlock_bh(&np->np_thread_lock);
+		return 0;
+	}
+	np->np_thread_state = ISCSI_NP_THREAD_SHUTDOWN;
+	spin_unlock_bh(&np->np_thread_lock);
+
+	if (np->np_thread) {
+		/*
+		 * We need to send the signal to wakeup Linux/Net
+		 * which may be sleeping in sock_accept()..
+		 */
+		send_sig(SIGINT, np->np_thread, 1);
+		kthread_stop(np->np_thread);
+	}
+	iscsit_del_np_comm(np);
+
+	spin_lock_bh(&np_lock);
+	list_del(&np->np_list);
+	spin_unlock_bh(&np_lock);
+
+	pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n",
+		np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ?
+		"TCP" : "SCTP");
+
+	kfree(np);
+	return 0;
+}
+
+static int __init iscsi_target_init_module(void)
+{
+	int ret = 0;
+
+	pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
+
+	iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL);
+	if (!iscsit_global) {
+		pr_err("Unable to allocate memory for iscsit_global\n");
+		return -1;
+	}
+	mutex_init(&auth_id_lock);
+	spin_lock_init(&sess_idr_lock);
+	idr_init(&tiqn_idr);
+	idr_init(&sess_idr);
+
+	ret = iscsi_target_register_configfs();
+	if (ret < 0)
+		goto out;
+
+	ret = iscsi_thread_set_init();
+	if (ret < 0)
+		goto configfs_out;
+
+	if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) !=
+			TARGET_THREAD_SET_COUNT) {
+		pr_err("iscsi_allocate_thread_sets() returned"
+			" unexpected value!\n");
+		goto ts_out1;
+	}
+
+	lio_cmd_cache = kmem_cache_create("lio_cmd_cache",
+			sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd),
+			0, NULL);
+	if (!lio_cmd_cache) {
+		pr_err("Unable to kmem_cache_create() for"
+				" lio_cmd_cache\n");
+		goto ts_out2;
+	}
+
+	lio_qr_cache = kmem_cache_create("lio_qr_cache",
+			sizeof(struct iscsi_queue_req),
+			__alignof__(struct iscsi_queue_req), 0, NULL);
+	if (!lio_qr_cache) {
+		pr_err("nable to kmem_cache_create() for"
+				" lio_qr_cache\n");
+		goto cmd_out;
+	}
+
+	lio_dr_cache = kmem_cache_create("lio_dr_cache",
+			sizeof(struct iscsi_datain_req),
+			__alignof__(struct iscsi_datain_req), 0, NULL);
+	if (!lio_dr_cache) {
+		pr_err("Unable to kmem_cache_create() for"
+				" lio_dr_cache\n");
+		goto qr_out;
+	}
+
+	lio_ooo_cache = kmem_cache_create("lio_ooo_cache",
+			sizeof(struct iscsi_ooo_cmdsn),
+			__alignof__(struct iscsi_ooo_cmdsn), 0, NULL);
+	if (!lio_ooo_cache) {
+		pr_err("Unable to kmem_cache_create() for"
+				" lio_ooo_cache\n");
+		goto dr_out;
+	}
+
+	lio_r2t_cache = kmem_cache_create("lio_r2t_cache",
+			sizeof(struct iscsi_r2t), __alignof__(struct iscsi_r2t),
+			0, NULL);
+	if (!lio_r2t_cache) {
+		pr_err("Unable to kmem_cache_create() for"
+				" lio_r2t_cache\n");
+		goto ooo_out;
+	}
+
+	if (iscsit_load_discovery_tpg() < 0)
+		goto r2t_out;
+
+	return ret;
+r2t_out:
+	kmem_cache_destroy(lio_r2t_cache);
+ooo_out:
+	kmem_cache_destroy(lio_ooo_cache);
+dr_out:
+	kmem_cache_destroy(lio_dr_cache);
+qr_out:
+	kmem_cache_destroy(lio_qr_cache);
+cmd_out:
+	kmem_cache_destroy(lio_cmd_cache);
+ts_out2:
+	iscsi_deallocate_thread_sets();
+ts_out1:
+	iscsi_thread_set_free();
+configfs_out:
+	iscsi_target_deregister_configfs();
+out:
+	kfree(iscsit_global);
+	return -ENOMEM;
+}
+
+static void __exit iscsi_target_cleanup_module(void)
+{
+	iscsi_deallocate_thread_sets();
+	iscsi_thread_set_free();
+	iscsit_release_discovery_tpg();
+	kmem_cache_destroy(lio_cmd_cache);
+	kmem_cache_destroy(lio_qr_cache);
+	kmem_cache_destroy(lio_dr_cache);
+	kmem_cache_destroy(lio_ooo_cache);
+	kmem_cache_destroy(lio_r2t_cache);
+
+	iscsi_target_deregister_configfs();
+
+	kfree(iscsit_global);
+}
+
+int iscsit_add_reject(
+	u8 reason,
+	int fail_conn,
+	unsigned char *buf,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_cmd *cmd;
+	struct iscsi_reject *hdr;
+	int ret;
+
+	cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+	if (!cmd)
+		return -1;
+
+	cmd->iscsi_opcode = ISCSI_OP_REJECT;
+	if (fail_conn)
+		cmd->cmd_flags |= ICF_REJECT_FAIL_CONN;
+
+	hdr	= (struct iscsi_reject *) cmd->pdu;
+	hdr->reason = reason;
+
+	cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+	if (!cmd->buf_ptr) {
+		pr_err("Unable to allocate memory for cmd->buf_ptr\n");
+		iscsit_release_cmd(cmd);
+		return -1;
+	}
+	memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN);
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	cmd->i_state = ISTATE_SEND_REJECT;
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+	ret = wait_for_completion_interruptible(&cmd->reject_comp);
+	if (ret != 0)
+		return -1;
+
+	return (!fail_conn) ? 0 : -1;
+}
+
+int iscsit_add_reject_from_cmd(
+	u8 reason,
+	int fail_conn,
+	int add_to_conn,
+	unsigned char *buf,
+	struct iscsi_cmd *cmd)
+{
+	struct iscsi_conn *conn;
+	struct iscsi_reject *hdr;
+	int ret;
+
+	if (!cmd->conn) {
+		pr_err("cmd->conn is NULL for ITT: 0x%08x\n",
+				cmd->init_task_tag);
+		return -1;
+	}
+	conn = cmd->conn;
+
+	cmd->iscsi_opcode = ISCSI_OP_REJECT;
+	if (fail_conn)
+		cmd->cmd_flags |= ICF_REJECT_FAIL_CONN;
+
+	hdr	= (struct iscsi_reject *) cmd->pdu;
+	hdr->reason = reason;
+
+	cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+	if (!cmd->buf_ptr) {
+		pr_err("Unable to allocate memory for cmd->buf_ptr\n");
+		iscsit_release_cmd(cmd);
+		return -1;
+	}
+	memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN);
+
+	if (add_to_conn) {
+		spin_lock_bh(&conn->cmd_lock);
+		list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+		spin_unlock_bh(&conn->cmd_lock);
+	}
+
+	cmd->i_state = ISTATE_SEND_REJECT;
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+	ret = wait_for_completion_interruptible(&cmd->reject_comp);
+	if (ret != 0)
+		return -1;
+
+	return (!fail_conn) ? 0 : -1;
+}
+
+/*
+ * Map some portion of the allocated scatterlist to an iovec, suitable for
+ * kernel sockets to copy data in/out. This handles both pages and slab-allocated
+ * buffers, since we have been tricky and mapped t_mem_sg to the buffer in
+ * either case (see iscsit_alloc_buffs)
+ */
+static int iscsit_map_iovec(
+	struct iscsi_cmd *cmd,
+	struct kvec *iov,
+	u32 data_offset,
+	u32 data_length)
+{
+	u32 i = 0;
+	struct scatterlist *sg;
+	unsigned int page_off;
+
+	/*
+	 * We have a private mapping of the allocated pages in t_mem_sg.
+	 * At this point, we also know each contains a page.
+	 */
+	sg = &cmd->t_mem_sg[data_offset / PAGE_SIZE];
+	page_off = (data_offset % PAGE_SIZE);
+
+	cmd->first_data_sg = sg;
+	cmd->first_data_sg_off = page_off;
+
+	while (data_length) {
+		u32 cur_len = min_t(u32, data_length, sg->length - page_off);
+
+		iov[i].iov_base = kmap(sg_page(sg)) + sg->offset + page_off;
+		iov[i].iov_len = cur_len;
+
+		data_length -= cur_len;
+		page_off = 0;
+		sg = sg_next(sg);
+		i++;
+	}
+
+	cmd->kmapped_nents = i;
+
+	return i;
+}
+
+static void iscsit_unmap_iovec(struct iscsi_cmd *cmd)
+{
+	u32 i;
+	struct scatterlist *sg;
+
+	sg = cmd->first_data_sg;
+
+	for (i = 0; i < cmd->kmapped_nents; i++)
+		kunmap(sg_page(&sg[i]));
+}
+
+static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn)
+{
+	struct iscsi_cmd *cmd;
+
+	conn->exp_statsn = exp_statsn;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+		spin_lock(&cmd->istate_lock);
+		if ((cmd->i_state == ISTATE_SENT_STATUS) &&
+		    (cmd->stat_sn < exp_statsn)) {
+			cmd->i_state = ISTATE_REMOVE;
+			spin_unlock(&cmd->istate_lock);
+			iscsit_add_cmd_to_immediate_queue(cmd, conn,
+						cmd->i_state);
+			continue;
+		}
+		spin_unlock(&cmd->istate_lock);
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+}
+
+static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
+{
+	u32 iov_count = (cmd->se_cmd.t_data_nents == 0) ? 1 :
+				cmd->se_cmd.t_data_nents;
+
+	iov_count += TRANSPORT_IOV_DATA_BUFFER;
+
+	cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL);
+	if (!cmd->iov_data) {
+		pr_err("Unable to allocate cmd->iov_data\n");
+		return -ENOMEM;
+	}
+
+	cmd->orig_iov_data_count = iov_count;
+	return 0;
+}
+
+static int iscsit_alloc_buffs(struct iscsi_cmd *cmd)
+{
+	struct scatterlist *sgl;
+	u32 length = cmd->se_cmd.data_length;
+	int nents = DIV_ROUND_UP(length, PAGE_SIZE);
+	int i = 0, ret;
+	/*
+	 * If no SCSI payload is present, allocate the default iovecs used for
+	 * iSCSI PDU Header
+	 */
+	if (!length)
+		return iscsit_allocate_iovecs(cmd);
+
+	sgl = kzalloc(sizeof(*sgl) * nents, GFP_KERNEL);
+	if (!sgl)
+		return -ENOMEM;
+
+	sg_init_table(sgl, nents);
+
+	while (length) {
+		int buf_size = min_t(int, length, PAGE_SIZE);
+		struct page *page;
+
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto page_alloc_failed;
+
+		sg_set_page(&sgl[i], page, buf_size, 0);
+
+		length -= buf_size;
+		i++;
+	}
+
+	cmd->t_mem_sg = sgl;
+	cmd->t_mem_sg_nents = nents;
+
+	/* BIDI ops not supported */
+
+	/* Tell the core about our preallocated memory */
+	transport_generic_map_mem_to_cmd(&cmd->se_cmd, sgl, nents, NULL, 0);
+	/*
+	 * Allocate iovecs for SCSI payload after transport_generic_map_mem_to_cmd
+	 * so that cmd->se_cmd.t_tasks_se_num has been set.
+	 */
+        ret = iscsit_allocate_iovecs(cmd);
+        if (ret < 0)
+		goto page_alloc_failed;
+
+	return 0;
+
+page_alloc_failed:
+	while (i >= 0) {
+		__free_page(sg_page(&sgl[i]));
+		i--;
+	}
+	kfree(cmd->t_mem_sg);
+	cmd->t_mem_sg = NULL;
+	return -ENOMEM;
+}
+
+static int iscsit_handle_scsi_cmd(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	int	data_direction, cmdsn_ret = 0, immed_ret, ret, transport_ret;
+	int	dump_immediate_data = 0, send_check_condition = 0, payload_length;
+	struct iscsi_cmd	*cmd = NULL;
+	struct iscsi_scsi_req *hdr;
+
+	spin_lock_bh(&conn->sess->session_stats_lock);
+	conn->sess->cmd_pdus++;
+	if (conn->sess->se_sess->se_node_acl) {
+		spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+		conn->sess->se_sess->se_node_acl->num_cmds++;
+		spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+	}
+	spin_unlock_bh(&conn->sess->session_stats_lock);
+
+	hdr			= (struct iscsi_scsi_req *) buf;
+	payload_length		= ntoh24(hdr->dlength);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->data_length	= be32_to_cpu(hdr->data_length);
+	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
+	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
+
+	/* FIXME; Add checks for AdditionalHeaderSegment */
+
+	if (!(hdr->flags & ISCSI_FLAG_CMD_WRITE) &&
+	    !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) {
+		pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL"
+				" not set. Bad iSCSI Initiator.\n");
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+				buf, conn);
+	}
+
+	if (((hdr->flags & ISCSI_FLAG_CMD_READ) ||
+	     (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) {
+		/*
+		 * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2)
+		 * that adds support for RESERVE/RELEASE.  There is a bug
+		 * add with this new functionality that sets R/W bits when
+		 * neither CDB carries any READ or WRITE datapayloads.
+		 */
+		if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) {
+			hdr->flags &= ~ISCSI_FLAG_CMD_READ;
+			hdr->flags &= ~ISCSI_FLAG_CMD_WRITE;
+			goto done;
+		}
+
+		pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE"
+			" set when Expected Data Transfer Length is 0 for"
+			" CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]);
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+				buf, conn);
+	}
+done:
+
+	if (!(hdr->flags & ISCSI_FLAG_CMD_READ) &&
+	    !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) {
+		pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE"
+			" MUST be set if Expected Data Transfer Length is not 0."
+			" Bad iSCSI Initiator\n");
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+				buf, conn);
+	}
+
+	if ((hdr->flags & ISCSI_FLAG_CMD_READ) &&
+	    (hdr->flags & ISCSI_FLAG_CMD_WRITE)) {
+		pr_err("Bidirectional operations not supported!\n");
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+				buf, conn);
+	}
+
+	if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+		pr_err("Illegally set Immediate Bit in iSCSI Initiator"
+				" Scsi Command PDU.\n");
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+				buf, conn);
+	}
+
+	if (payload_length && !conn->sess->sess_ops->ImmediateData) {
+		pr_err("ImmediateData=No but DataSegmentLength=%u,"
+			" protocol error.\n", payload_length);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+				buf, conn);
+	}
+
+	if ((hdr->data_length == payload_length) &&
+	    (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) {
+		pr_err("Expected Data Transfer Length and Length of"
+			" Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL"
+			" bit is not set protocol error\n");
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+				buf, conn);
+	}
+
+	if (payload_length > hdr->data_length) {
+		pr_err("DataSegmentLength: %u is greater than"
+			" EDTL: %u, protocol error.\n", payload_length,
+				hdr->data_length);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+				buf, conn);
+	}
+
+	if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+		pr_err("DataSegmentLength: %u is greater than"
+			" MaxRecvDataSegmentLength: %u, protocol error.\n",
+			payload_length, conn->conn_ops->MaxRecvDataSegmentLength);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+				buf, conn);
+	}
+
+	if (payload_length > conn->sess->sess_ops->FirstBurstLength) {
+		pr_err("DataSegmentLength: %u is greater than"
+			" FirstBurstLength: %u, protocol error.\n",
+			payload_length, conn->sess->sess_ops->FirstBurstLength);
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+					buf, conn);
+	}
+
+	data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE :
+			 (hdr->flags & ISCSI_FLAG_CMD_READ) ? DMA_FROM_DEVICE :
+			  DMA_NONE;
+
+	cmd = iscsit_allocate_se_cmd(conn, hdr->data_length, data_direction,
+				(hdr->flags & ISCSI_FLAG_CMD_ATTR_MASK));
+	if (!cmd)
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1,
+					buf, conn);
+
+	pr_debug("Got SCSI Command, ITT: 0x%08x, CmdSN: 0x%08x,"
+		" ExpXferLen: %u, Length: %u, CID: %hu\n", hdr->itt,
+		hdr->cmdsn, hdr->data_length, payload_length, conn->cid);
+
+	cmd->iscsi_opcode	= ISCSI_OP_SCSI_CMD;
+	cmd->i_state		= ISTATE_NEW_CMD;
+	cmd->immediate_cmd	= ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+	cmd->immediate_data	= (payload_length) ? 1 : 0;
+	cmd->unsolicited_data	= ((!(hdr->flags & ISCSI_FLAG_CMD_FINAL) &&
+				     (hdr->flags & ISCSI_FLAG_CMD_WRITE)) ? 1 : 0);
+	if (cmd->unsolicited_data)
+		cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA;
+
+	conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+		spin_lock_bh(&conn->sess->ttt_lock);
+		cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+		if (cmd->targ_xfer_tag == 0xFFFFFFFF)
+			cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+		spin_unlock_bh(&conn->sess->ttt_lock);
+	} else if (hdr->flags & ISCSI_FLAG_CMD_WRITE)
+		cmd->targ_xfer_tag = 0xFFFFFFFF;
+	cmd->cmd_sn		= hdr->cmdsn;
+	cmd->exp_stat_sn	= hdr->exp_statsn;
+	cmd->first_burst_len	= payload_length;
+
+	if (cmd->data_direction == DMA_FROM_DEVICE) {
+		struct iscsi_datain_req *dr;
+
+		dr = iscsit_allocate_datain_req();
+		if (!dr)
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+					1, 1, buf, cmd);
+
+		iscsit_attach_datain_req(cmd, dr);
+	}
+
+	/*
+	 * The CDB is going to an se_device_t.
+	 */
+	ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb,
+				get_unaligned_le64(&hdr->lun));
+	if (ret < 0) {
+		if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) {
+			pr_debug("Responding to non-acl'ed,"
+				" non-existent or non-exported iSCSI LUN:"
+				" 0x%016Lx\n", get_unaligned_le64(&hdr->lun));
+		}
+		if (ret == PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES)
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+					1, 1, buf, cmd);
+
+		send_check_condition = 1;
+		goto attach_cmd;
+	}
+	/*
+	 * The Initiator Node has access to the LUN (the addressing method
+	 * is handled inside of iscsit_get_lun_for_cmd()).  Now it's time to
+	 * allocate 1->N transport tasks (depending on sector count and
+	 * maximum request size the physical HBA(s) can handle.
+	 */
+	transport_ret = transport_generic_allocate_tasks(&cmd->se_cmd, hdr->cdb);
+	if (transport_ret == -ENOMEM) {
+		return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+				1, 1, buf, cmd);
+	} else if (transport_ret == -EINVAL) {
+		/*
+		 * Unsupported SAM Opcode.  CHECK_CONDITION will be sent
+		 * in iscsit_execute_cmd() during the CmdSN OOO Execution
+		 * Mechinism.
+		 */
+		send_check_condition = 1;
+	} else {
+		if (iscsit_decide_list_to_build(cmd, payload_length) < 0)
+			return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+				1, 1, buf, cmd);
+	}
+
+attach_cmd:
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+	/*
+	 * Check if we need to delay processing because of ALUA
+	 * Active/NonOptimized primary access state..
+	 */
+	core_alua_check_nonop_delay(&cmd->se_cmd);
+	/*
+	 * Allocate and setup SGL used with transport_generic_map_mem_to_cmd().
+	 * also call iscsit_allocate_iovecs()
+	 */
+	ret = iscsit_alloc_buffs(cmd);
+	if (ret < 0)
+		return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+				1, 1, buf, cmd);
+	/*
+	 * Check the CmdSN against ExpCmdSN/MaxCmdSN here if
+	 * the Immediate Bit is not set, and no Immediate
+	 * Data is attached.
+	 *
+	 * A PDU/CmdSN carrying Immediate Data can only
+	 * be processed after the DataCRC has passed.
+	 * If the DataCRC fails, the CmdSN MUST NOT
+	 * be acknowledged. (See below)
+	 */
+	if (!cmd->immediate_data) {
+		cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+		if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+			return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_PROTOCOL_ERROR,
+				1, 0, buf, cmd);
+	}
+
+	iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+	/*
+	 * If no Immediate Data is attached, it's OK to return now.
+	 */
+	if (!cmd->immediate_data) {
+		if (send_check_condition)
+			return 0;
+
+		if (cmd->unsolicited_data) {
+			iscsit_set_dataout_sequence_values(cmd);
+
+			spin_lock_bh(&cmd->dataout_timeout_lock);
+			iscsit_start_dataout_timer(cmd, cmd->conn);
+			spin_unlock_bh(&cmd->dataout_timeout_lock);
+		}
+
+		return 0;
+	}
+
+	/*
+	 * Early CHECK_CONDITIONs never make it to the transport processing
+	 * thread.  They are processed in CmdSN order by
+	 * iscsit_check_received_cmdsn() below.
+	 */
+	if (send_check_condition) {
+		immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION;
+		dump_immediate_data = 1;
+		goto after_immediate_data;
+	}
+	/*
+	 * Call directly into transport_generic_new_cmd() to perform
+	 * the backend memory allocation.
+	 */
+	ret = transport_generic_new_cmd(&cmd->se_cmd);
+	if ((ret < 0) || (cmd->se_cmd.se_cmd_flags & SCF_SE_CMD_FAILED)) {
+		immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION;
+		dump_immediate_data = 1;
+		goto after_immediate_data;
+	}
+
+	immed_ret = iscsit_handle_immediate_data(cmd, buf, payload_length);
+after_immediate_data:
+	if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) {
+		/*
+		 * A PDU/CmdSN carrying Immediate Data passed
+		 * DataCRC, check against ExpCmdSN/MaxCmdSN if
+		 * Immediate Bit is not set.
+		 */
+		cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+		/*
+		 * Special case for Unsupported SAM WRITE Opcodes
+		 * and ImmediateData=Yes.
+		 */
+		if (dump_immediate_data) {
+			if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+				return -1;
+		} else if (cmd->unsolicited_data) {
+			iscsit_set_dataout_sequence_values(cmd);
+
+			spin_lock_bh(&cmd->dataout_timeout_lock);
+			iscsit_start_dataout_timer(cmd, cmd->conn);
+			spin_unlock_bh(&cmd->dataout_timeout_lock);
+		}
+
+		if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+			return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_PROTOCOL_ERROR,
+				1, 0, buf, cmd);
+
+	} else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) {
+		/*
+		 * Immediate Data failed DataCRC and ERL>=1,
+		 * silently drop this PDU and let the initiator
+		 * plug the CmdSN gap.
+		 *
+		 * FIXME: Send Unsolicited NOPIN with reserved
+		 * TTT here to help the initiator figure out
+		 * the missing CmdSN, although they should be
+		 * intelligent enough to determine the missing
+		 * CmdSN and issue a retry to plug the sequence.
+		 */
+		cmd->i_state = ISTATE_REMOVE;
+		iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+	} else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */
+		return -1;
+
+	return 0;
+}
+
+static u32 iscsit_do_crypto_hash_sg(
+	struct hash_desc *hash,
+	struct iscsi_cmd *cmd,
+	u32 data_offset,
+	u32 data_length,
+	u32 padding,
+	u8 *pad_bytes)
+{
+	u32 data_crc;
+	u32 i;
+	struct scatterlist *sg;
+	unsigned int page_off;
+
+	crypto_hash_init(hash);
+
+	sg = cmd->first_data_sg;
+	page_off = cmd->first_data_sg_off;
+
+	i = 0;
+	while (data_length) {
+		u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off));
+
+		crypto_hash_update(hash, &sg[i], cur_len);
+
+		data_length -= cur_len;
+		page_off = 0;
+		i++;
+	}
+
+	if (padding) {
+		struct scatterlist pad_sg;
+
+		sg_init_one(&pad_sg, pad_bytes, padding);
+		crypto_hash_update(hash, &pad_sg, padding);
+	}
+	crypto_hash_final(hash, (u8 *) &data_crc);
+
+	return data_crc;
+}
+
+static void iscsit_do_crypto_hash_buf(
+	struct hash_desc *hash,
+	unsigned char *buf,
+	u32 payload_length,
+	u32 padding,
+	u8 *pad_bytes,
+	u8 *data_crc)
+{
+	struct scatterlist sg;
+
+	crypto_hash_init(hash);
+
+	sg_init_one(&sg, (u8 *)buf, payload_length);
+	crypto_hash_update(hash, &sg, payload_length);
+
+	if (padding) {
+		sg_init_one(&sg, pad_bytes, padding);
+		crypto_hash_update(hash, &sg, padding);
+	}
+	crypto_hash_final(hash, data_crc);
+}
+
+static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf)
+{
+	int iov_ret, ooo_cmdsn = 0, ret;
+	u8 data_crc_failed = 0;
+	u32 checksum, iov_count = 0, padding = 0, rx_got = 0;
+	u32 rx_size = 0, payload_length;
+	struct iscsi_cmd *cmd = NULL;
+	struct se_cmd *se_cmd;
+	struct iscsi_data *hdr;
+	struct kvec *iov;
+	unsigned long flags;
+
+	hdr			= (struct iscsi_data *) buf;
+	payload_length		= ntoh24(hdr->dlength);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->ttt		= be32_to_cpu(hdr->ttt);
+	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
+	hdr->datasn		= be32_to_cpu(hdr->datasn);
+	hdr->offset		= be32_to_cpu(hdr->offset);
+
+	if (!payload_length) {
+		pr_err("DataOUT payload is ZERO, protocol error.\n");
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+
+	/* iSCSI write */
+	spin_lock_bh(&conn->sess->session_stats_lock);
+	conn->sess->rx_data_octets += payload_length;
+	if (conn->sess->se_sess->se_node_acl) {
+		spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+		conn->sess->se_sess->se_node_acl->write_bytes += payload_length;
+		spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+	}
+	spin_unlock_bh(&conn->sess->session_stats_lock);
+
+	if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+		pr_err("DataSegmentLength: %u is greater than"
+			" MaxRecvDataSegmentLength: %u\n", payload_length,
+			conn->conn_ops->MaxRecvDataSegmentLength);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+
+	cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt,
+			payload_length);
+	if (!cmd)
+		return 0;
+
+	pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x,"
+		" DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
+		hdr->itt, hdr->ttt, hdr->datasn, hdr->offset,
+		payload_length, conn->cid);
+
+	if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+		pr_err("Command ITT: 0x%08x received DataOUT after"
+			" last DataOUT received, dumping payload\n",
+			cmd->init_task_tag);
+		return iscsit_dump_data_payload(conn, payload_length, 1);
+	}
+
+	if (cmd->data_direction != DMA_TO_DEVICE) {
+		pr_err("Command ITT: 0x%08x received DataOUT for a"
+			" NON-WRITE command.\n", cmd->init_task_tag);
+		return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR,
+				1, 0, buf, cmd);
+	}
+	se_cmd = &cmd->se_cmd;
+	iscsit_mod_dataout_timer(cmd);
+
+	if ((hdr->offset + payload_length) > cmd->data_length) {
+		pr_err("DataOut Offset: %u, Length %u greater than"
+			" iSCSI Command EDTL %u, protocol error.\n",
+			hdr->offset, payload_length, cmd->data_length);
+		return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID,
+				1, 0, buf, cmd);
+	}
+
+	if (cmd->unsolicited_data) {
+		int dump_unsolicited_data = 0;
+
+		if (conn->sess->sess_ops->InitialR2T) {
+			pr_err("Received unexpected unsolicited data"
+				" while InitialR2T=Yes, protocol error.\n");
+			transport_send_check_condition_and_sense(&cmd->se_cmd,
+					TCM_UNEXPECTED_UNSOLICITED_DATA, 0);
+			return -1;
+		}
+		/*
+		 * Special case for dealing with Unsolicited DataOUT
+		 * and Unsupported SAM WRITE Opcodes and SE resource allocation
+		 * failures;
+		 */
+
+		/* Something's amiss if we're not in WRITE_PENDING state... */
+		spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+		WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING);
+		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+		spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+		if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) ||
+		     (se_cmd->se_cmd_flags & SCF_SE_CMD_FAILED))
+			dump_unsolicited_data = 1;
+		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+		if (dump_unsolicited_data) {
+			/*
+			 * Check if a delayed TASK_ABORTED status needs to
+			 * be sent now if the ISCSI_FLAG_CMD_FINAL has been
+			 * received with the unsolicitied data out.
+			 */
+			if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
+				iscsit_stop_dataout_timer(cmd);
+
+			transport_check_aborted_status(se_cmd,
+					(hdr->flags & ISCSI_FLAG_CMD_FINAL));
+			return iscsit_dump_data_payload(conn, payload_length, 1);
+		}
+	} else {
+		/*
+		 * For the normal solicited data path:
+		 *
+		 * Check for a delayed TASK_ABORTED status and dump any
+		 * incoming data out payload if one exists.  Also, when the
+		 * ISCSI_FLAG_CMD_FINAL is set to denote the end of the current
+		 * data out sequence, we decrement outstanding_r2ts.  Once
+		 * outstanding_r2ts reaches zero, go ahead and send the delayed
+		 * TASK_ABORTED status.
+		 */
+		if (atomic_read(&se_cmd->t_transport_aborted) != 0) {
+			if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
+				if (--cmd->outstanding_r2ts < 1) {
+					iscsit_stop_dataout_timer(cmd);
+					transport_check_aborted_status(
+							se_cmd, 1);
+				}
+
+			return iscsit_dump_data_payload(conn, payload_length, 1);
+		}
+	}
+	/*
+	 * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and
+	 * within-command recovery checks before receiving the payload.
+	 */
+	ret = iscsit_check_pre_dataout(cmd, buf);
+	if (ret == DATAOUT_WITHIN_COMMAND_RECOVERY)
+		return 0;
+	else if (ret == DATAOUT_CANNOT_RECOVER)
+		return -1;
+
+	rx_size += payload_length;
+	iov = &cmd->iov_data[0];
+
+	iov_ret = iscsit_map_iovec(cmd, iov, hdr->offset, payload_length);
+	if (iov_ret < 0)
+		return -1;
+
+	iov_count += iov_ret;
+
+	padding = ((-payload_length) & 3);
+	if (padding != 0) {
+		iov[iov_count].iov_base	= cmd->pad_bytes;
+		iov[iov_count++].iov_len = padding;
+		rx_size += padding;
+		pr_debug("Receiving %u padding bytes.\n", padding);
+	}
+
+	if (conn->conn_ops->DataDigest) {
+		iov[iov_count].iov_base = &checksum;
+		iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+		rx_size += ISCSI_CRC_LEN;
+	}
+
+	rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size);
+
+	iscsit_unmap_iovec(cmd);
+
+	if (rx_got != rx_size)
+		return -1;
+
+	if (conn->conn_ops->DataDigest) {
+		u32 data_crc;
+
+		data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd,
+						    hdr->offset, payload_length, padding,
+						    cmd->pad_bytes);
+
+		if (checksum != data_crc) {
+			pr_err("ITT: 0x%08x, Offset: %u, Length: %u,"
+				" DataSN: 0x%08x, CRC32C DataDigest 0x%08x"
+				" does not match computed 0x%08x\n",
+				hdr->itt, hdr->offset, payload_length,
+				hdr->datasn, checksum, data_crc);
+			data_crc_failed = 1;
+		} else {
+			pr_debug("Got CRC32C DataDigest 0x%08x for"
+				" %u bytes of Data Out\n", checksum,
+				payload_length);
+		}
+	}
+	/*
+	 * Increment post receive data and CRC values or perform
+	 * within-command recovery.
+	 */
+	ret = iscsit_check_post_dataout(cmd, buf, data_crc_failed);
+	if ((ret == DATAOUT_NORMAL) || (ret == DATAOUT_WITHIN_COMMAND_RECOVERY))
+		return 0;
+	else if (ret == DATAOUT_SEND_R2T) {
+		iscsit_set_dataout_sequence_values(cmd);
+		iscsit_build_r2ts_for_cmd(cmd, conn, 0);
+	} else if (ret == DATAOUT_SEND_TO_TRANSPORT) {
+		/*
+		 * Handle extra special case for out of order
+		 * Unsolicited Data Out.
+		 */
+		spin_lock_bh(&cmd->istate_lock);
+		ooo_cmdsn = (cmd->cmd_flags & ICF_OOO_CMDSN);
+		cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
+		cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
+		spin_unlock_bh(&cmd->istate_lock);
+
+		iscsit_stop_dataout_timer(cmd);
+		return (!ooo_cmdsn) ? transport_generic_handle_data(
+					&cmd->se_cmd) : 0;
+	} else /* DATAOUT_CANNOT_RECOVER */
+		return -1;
+
+	return 0;
+}
+
+static int iscsit_handle_nop_out(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	unsigned char *ping_data = NULL;
+	int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size;
+	u32 checksum, data_crc, padding = 0, payload_length;
+	u64 lun;
+	struct iscsi_cmd *cmd = NULL;
+	struct kvec *iov = NULL;
+	struct iscsi_nopout *hdr;
+
+	hdr			= (struct iscsi_nopout *) buf;
+	payload_length		= ntoh24(hdr->dlength);
+	lun			= get_unaligned_le64(&hdr->lun);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->ttt		= be32_to_cpu(hdr->ttt);
+	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
+	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
+
+	if ((hdr->itt == 0xFFFFFFFF) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+		pr_err("NOPOUT ITT is reserved, but Immediate Bit is"
+			" not set, protocol error.\n");
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+
+	if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+		pr_err("NOPOUT Ping Data DataSegmentLength: %u is"
+			" greater than MaxRecvDataSegmentLength: %u, protocol"
+			" error.\n", payload_length,
+			conn->conn_ops->MaxRecvDataSegmentLength);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+
+	pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%09x,"
+		" CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n",
+		(hdr->itt == 0xFFFFFFFF) ? "Response" : "Request",
+		hdr->itt, hdr->ttt, hdr->cmdsn, hdr->exp_statsn,
+		payload_length);
+	/*
+	 * This is not a response to a Unsolicited NopIN, which means
+	 * it can either be a NOPOUT ping request (with a valid ITT),
+	 * or a NOPOUT not requesting a NOPIN (with a reserved ITT).
+	 * Either way, make sure we allocate an struct iscsi_cmd, as both
+	 * can contain ping data.
+	 */
+	if (hdr->ttt == 0xFFFFFFFF) {
+		cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+		if (!cmd)
+			return iscsit_add_reject(
+					ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+					1, buf, conn);
+
+		cmd->iscsi_opcode	= ISCSI_OP_NOOP_OUT;
+		cmd->i_state		= ISTATE_SEND_NOPIN;
+		cmd->immediate_cmd	= ((hdr->opcode & ISCSI_OP_IMMEDIATE) ?
+						1 : 0);
+		conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+		cmd->targ_xfer_tag	= 0xFFFFFFFF;
+		cmd->cmd_sn		= hdr->cmdsn;
+		cmd->exp_stat_sn	= hdr->exp_statsn;
+		cmd->data_direction	= DMA_NONE;
+	}
+
+	if (payload_length && (hdr->ttt == 0xFFFFFFFF)) {
+		rx_size = payload_length;
+		ping_data = kzalloc(payload_length + 1, GFP_KERNEL);
+		if (!ping_data) {
+			pr_err("Unable to allocate memory for"
+				" NOPOUT ping data.\n");
+			ret = -1;
+			goto out;
+		}
+
+		iov = &cmd->iov_misc[0];
+		iov[niov].iov_base	= ping_data;
+		iov[niov++].iov_len	= payload_length;
+
+		padding = ((-payload_length) & 3);
+		if (padding != 0) {
+			pr_debug("Receiving %u additional bytes"
+				" for padding.\n", padding);
+			iov[niov].iov_base	= &cmd->pad_bytes;
+			iov[niov++].iov_len	= padding;
+			rx_size += padding;
+		}
+		if (conn->conn_ops->DataDigest) {
+			iov[niov].iov_base	= &checksum;
+			iov[niov++].iov_len	= ISCSI_CRC_LEN;
+			rx_size += ISCSI_CRC_LEN;
+		}
+
+		rx_got = rx_data(conn, &cmd->iov_misc[0], niov, rx_size);
+		if (rx_got != rx_size) {
+			ret = -1;
+			goto out;
+		}
+
+		if (conn->conn_ops->DataDigest) {
+			iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+					ping_data, payload_length,
+					padding, cmd->pad_bytes,
+					(u8 *)&data_crc);
+
+			if (checksum != data_crc) {
+				pr_err("Ping data CRC32C DataDigest"
+				" 0x%08x does not match computed 0x%08x\n",
+					checksum, data_crc);
+				if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+					pr_err("Unable to recover from"
+					" NOPOUT Ping DataCRC failure while in"
+						" ERL=0.\n");
+					ret = -1;
+					goto out;
+				} else {
+					/*
+					 * Silently drop this PDU and let the
+					 * initiator plug the CmdSN gap.
+					 */
+					pr_debug("Dropping NOPOUT"
+					" Command CmdSN: 0x%08x due to"
+					" DataCRC error.\n", hdr->cmdsn);
+					ret = 0;
+					goto out;
+				}
+			} else {
+				pr_debug("Got CRC32C DataDigest"
+				" 0x%08x for %u bytes of ping data.\n",
+					checksum, payload_length);
+			}
+		}
+
+		ping_data[payload_length] = '\0';
+		/*
+		 * Attach ping data to struct iscsi_cmd->buf_ptr.
+		 */
+		cmd->buf_ptr = (void *)ping_data;
+		cmd->buf_ptr_size = payload_length;
+
+		pr_debug("Got %u bytes of NOPOUT ping"
+			" data.\n", payload_length);
+		pr_debug("Ping Data: \"%s\"\n", ping_data);
+	}
+
+	if (hdr->itt != 0xFFFFFFFF) {
+		if (!cmd) {
+			pr_err("Checking CmdSN for NOPOUT,"
+				" but cmd is NULL!\n");
+			return -1;
+		}
+		/*
+		 * Initiator is expecting a NopIN ping reply,
+		 */
+		spin_lock_bh(&conn->cmd_lock);
+		list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+		spin_unlock_bh(&conn->cmd_lock);
+
+		iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+		if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+			iscsit_add_cmd_to_response_queue(cmd, conn,
+					cmd->i_state);
+			return 0;
+		}
+
+		cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+		if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+			ret = 0;
+			goto ping_out;
+		}
+		if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_PROTOCOL_ERROR,
+					1, 0, buf, cmd);
+
+		return 0;
+	}
+
+	if (hdr->ttt != 0xFFFFFFFF) {
+		/*
+		 * This was a response to a unsolicited NOPIN ping.
+		 */
+		cmd = iscsit_find_cmd_from_ttt(conn, hdr->ttt);
+		if (!cmd)
+			return -1;
+
+		iscsit_stop_nopin_response_timer(conn);
+
+		cmd->i_state = ISTATE_REMOVE;
+		iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+		iscsit_start_nopin_timer(conn);
+	} else {
+		/*
+		 * Initiator is not expecting a NOPIN is response.
+		 * Just ignore for now.
+		 *
+		 * iSCSI v19-91 10.18
+		 * "A NOP-OUT may also be used to confirm a changed
+		 *  ExpStatSN if another PDU will not be available
+		 *  for a long time."
+		 */
+		ret = 0;
+		goto out;
+	}
+
+	return 0;
+out:
+	if (cmd)
+		iscsit_release_cmd(cmd);
+ping_out:
+	kfree(ping_data);
+	return ret;
+}
+
+static int iscsit_handle_task_mgt_cmd(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	struct iscsi_cmd *cmd;
+	struct se_tmr_req *se_tmr;
+	struct iscsi_tmr_req *tmr_req;
+	struct iscsi_tm *hdr;
+	u32 payload_length;
+	int out_of_order_cmdsn = 0;
+	int ret;
+	u8 function;
+
+	hdr			= (struct iscsi_tm *) buf;
+	payload_length		= ntoh24(hdr->dlength);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->rtt		= be32_to_cpu(hdr->rtt);
+	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
+	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
+	hdr->refcmdsn		= be32_to_cpu(hdr->refcmdsn);
+	hdr->exp_datasn		= be32_to_cpu(hdr->exp_datasn);
+	hdr->flags &= ~ISCSI_FLAG_CMD_FINAL;
+	function = hdr->flags;
+
+	pr_debug("Got Task Management Request ITT: 0x%08x, CmdSN:"
+		" 0x%08x, Function: 0x%02x, RefTaskTag: 0x%08x, RefCmdSN:"
+		" 0x%08x, CID: %hu\n", hdr->itt, hdr->cmdsn, function,
+		hdr->rtt, hdr->refcmdsn, conn->cid);
+
+	if ((function != ISCSI_TM_FUNC_ABORT_TASK) &&
+	    ((function != ISCSI_TM_FUNC_TASK_REASSIGN) &&
+	     (hdr->rtt != ISCSI_RESERVED_TAG))) {
+		pr_err("RefTaskTag should be set to 0xFFFFFFFF.\n");
+		hdr->rtt = ISCSI_RESERVED_TAG;
+	}
+
+	if ((function == ISCSI_TM_FUNC_TASK_REASSIGN) &&
+			!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+		pr_err("Task Management Request TASK_REASSIGN not"
+			" issued as immediate command, bad iSCSI Initiator"
+				"implementation\n");
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+	if ((function != ISCSI_TM_FUNC_ABORT_TASK) &&
+	    (hdr->refcmdsn != ISCSI_RESERVED_TAG))
+		hdr->refcmdsn = ISCSI_RESERVED_TAG;
+
+	cmd = iscsit_allocate_se_cmd_for_tmr(conn, function);
+	if (!cmd)
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+					1, buf, conn);
+
+	cmd->iscsi_opcode	= ISCSI_OP_SCSI_TMFUNC;
+	cmd->i_state		= ISTATE_SEND_TASKMGTRSP;
+	cmd->immediate_cmd	= ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+	cmd->init_task_tag	= hdr->itt;
+	cmd->targ_xfer_tag	= 0xFFFFFFFF;
+	cmd->cmd_sn		= hdr->cmdsn;
+	cmd->exp_stat_sn	= hdr->exp_statsn;
+	se_tmr			= cmd->se_cmd.se_tmr_req;
+	tmr_req			= cmd->tmr_req;
+	/*
+	 * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN
+	 */
+	if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
+		ret = iscsit_get_lun_for_tmr(cmd,
+				get_unaligned_le64(&hdr->lun));
+		if (ret < 0) {
+			cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+			se_tmr->response = ISCSI_TMF_RSP_NO_LUN;
+			goto attach;
+		}
+	}
+
+	switch (function) {
+	case ISCSI_TM_FUNC_ABORT_TASK:
+		se_tmr->response = iscsit_tmr_abort_task(cmd, buf);
+		if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) {
+			cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+			goto attach;
+		}
+		break;
+	case ISCSI_TM_FUNC_ABORT_TASK_SET:
+	case ISCSI_TM_FUNC_CLEAR_ACA:
+	case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+	case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+		break;
+	case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+		if (iscsit_tmr_task_warm_reset(conn, tmr_req, buf) < 0) {
+			cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+			se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED;
+			goto attach;
+		}
+		break;
+	case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+		if (iscsit_tmr_task_cold_reset(conn, tmr_req, buf) < 0) {
+			cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+			se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED;
+			goto attach;
+		}
+		break;
+	case ISCSI_TM_FUNC_TASK_REASSIGN:
+		se_tmr->response = iscsit_tmr_task_reassign(cmd, buf);
+		/*
+		 * Perform sanity checks on the ExpDataSN only if the
+		 * TASK_REASSIGN was successful.
+		 */
+		if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE)
+			break;
+
+		if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0)
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_BOOKMARK_INVALID, 1, 1,
+					buf, cmd);
+		break;
+	default:
+		pr_err("Unknown TMR function: 0x%02x, protocol"
+			" error.\n", function);
+		cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+		se_tmr->response = ISCSI_TMF_RSP_NOT_SUPPORTED;
+		goto attach;
+	}
+
+	if ((function != ISCSI_TM_FUNC_TASK_REASSIGN) &&
+	    (se_tmr->response == ISCSI_TMF_RSP_COMPLETE))
+		se_tmr->call_transport = 1;
+attach:
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+		int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+		if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP)
+			out_of_order_cmdsn = 1;
+		else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+			return 0;
+		} else { /* (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) */
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_PROTOCOL_ERROR,
+					1, 0, buf, cmd);
+		}
+	}
+	iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+	if (out_of_order_cmdsn)
+		return 0;
+	/*
+	 * Found the referenced task, send to transport for processing.
+	 */
+	if (se_tmr->call_transport)
+		return transport_generic_handle_tmr(&cmd->se_cmd);
+
+	/*
+	 * Could not find the referenced LUN, task, or Task Management
+	 * command not authorized or supported.  Change state and
+	 * let the tx_thread send the response.
+	 *
+	 * For connection recovery, this is also the default action for
+	 * TMR TASK_REASSIGN.
+	 */
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+	return 0;
+}
+
+/* #warning FIXME: Support Text Command parameters besides SendTargets */
+static int iscsit_handle_text_cmd(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	char *text_ptr, *text_in;
+	int cmdsn_ret, niov = 0, rx_got, rx_size;
+	u32 checksum = 0, data_crc = 0, payload_length;
+	u32 padding = 0, text_length = 0;
+	struct iscsi_cmd *cmd;
+	struct kvec iov[3];
+	struct iscsi_text *hdr;
+
+	hdr			= (struct iscsi_text *) buf;
+	payload_length		= ntoh24(hdr->dlength);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->ttt		= be32_to_cpu(hdr->ttt);
+	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
+	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
+
+	if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+		pr_err("Unable to accept text parameter length: %u"
+			"greater than MaxRecvDataSegmentLength %u.\n",
+		       payload_length, conn->conn_ops->MaxRecvDataSegmentLength);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+
+	pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x,"
+		" ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn,
+		hdr->exp_statsn, payload_length);
+
+	rx_size = text_length = payload_length;
+	if (text_length) {
+		text_in = kzalloc(text_length, GFP_KERNEL);
+		if (!text_in) {
+			pr_err("Unable to allocate memory for"
+				" incoming text parameters\n");
+			return -1;
+		}
+
+		memset(iov, 0, 3 * sizeof(struct kvec));
+		iov[niov].iov_base	= text_in;
+		iov[niov++].iov_len	= text_length;
+
+		padding = ((-payload_length) & 3);
+		if (padding != 0) {
+			iov[niov].iov_base = cmd->pad_bytes;
+			iov[niov++].iov_len  = padding;
+			rx_size += padding;
+			pr_debug("Receiving %u additional bytes"
+					" for padding.\n", padding);
+		}
+		if (conn->conn_ops->DataDigest) {
+			iov[niov].iov_base	= &checksum;
+			iov[niov++].iov_len	= ISCSI_CRC_LEN;
+			rx_size += ISCSI_CRC_LEN;
+		}
+
+		rx_got = rx_data(conn, &iov[0], niov, rx_size);
+		if (rx_got != rx_size) {
+			kfree(text_in);
+			return -1;
+		}
+
+		if (conn->conn_ops->DataDigest) {
+			iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+					text_in, text_length,
+					padding, cmd->pad_bytes,
+					(u8 *)&data_crc);
+
+			if (checksum != data_crc) {
+				pr_err("Text data CRC32C DataDigest"
+					" 0x%08x does not match computed"
+					" 0x%08x\n", checksum, data_crc);
+				if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+					pr_err("Unable to recover from"
+					" Text Data digest failure while in"
+						" ERL=0.\n");
+					kfree(text_in);
+					return -1;
+				} else {
+					/*
+					 * Silently drop this PDU and let the
+					 * initiator plug the CmdSN gap.
+					 */
+					pr_debug("Dropping Text"
+					" Command CmdSN: 0x%08x due to"
+					" DataCRC error.\n", hdr->cmdsn);
+					kfree(text_in);
+					return 0;
+				}
+			} else {
+				pr_debug("Got CRC32C DataDigest"
+					" 0x%08x for %u bytes of text data.\n",
+						checksum, text_length);
+			}
+		}
+		text_in[text_length - 1] = '\0';
+		pr_debug("Successfully read %d bytes of text"
+				" data.\n", text_length);
+
+		if (strncmp("SendTargets", text_in, 11) != 0) {
+			pr_err("Received Text Data that is not"
+				" SendTargets, cannot continue.\n");
+			kfree(text_in);
+			return -1;
+		}
+		text_ptr = strchr(text_in, '=');
+		if (!text_ptr) {
+			pr_err("No \"=\" separator found in Text Data,"
+				"  cannot continue.\n");
+			kfree(text_in);
+			return -1;
+		}
+		if (strncmp("=All", text_ptr, 4) != 0) {
+			pr_err("Unable to locate All value for"
+				" SendTargets key,  cannot continue.\n");
+			kfree(text_in);
+			return -1;
+		}
+/*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */
+		kfree(text_in);
+	}
+
+	cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+	if (!cmd)
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+					1, buf, conn);
+
+	cmd->iscsi_opcode	= ISCSI_OP_TEXT;
+	cmd->i_state		= ISTATE_SEND_TEXTRSP;
+	cmd->immediate_cmd	= ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+	conn->sess->init_task_tag = cmd->init_task_tag	= hdr->itt;
+	cmd->targ_xfer_tag	= 0xFFFFFFFF;
+	cmd->cmd_sn		= hdr->cmdsn;
+	cmd->exp_stat_sn	= hdr->exp_statsn;
+	cmd->data_direction	= DMA_NONE;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+	if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+		cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+		if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_PROTOCOL_ERROR,
+					1, 0, buf, cmd);
+
+		return 0;
+	}
+
+	return iscsit_execute_cmd(cmd, 0);
+}
+
+int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+	struct iscsi_conn *conn_p;
+	struct iscsi_session *sess = conn->sess;
+
+	pr_debug("Received logout request CLOSESESSION on CID: %hu"
+		" for SID: %u.\n", conn->cid, conn->sess->sid);
+
+	atomic_set(&sess->session_logout, 1);
+	atomic_set(&conn->conn_logout_remove, 1);
+	conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_SESSION;
+
+	iscsit_inc_conn_usage_count(conn);
+	iscsit_inc_session_usage_count(sess);
+
+	spin_lock_bh(&sess->conn_lock);
+	list_for_each_entry(conn_p, &sess->sess_conn_list, conn_list) {
+		if (conn_p->conn_state != TARG_CONN_STATE_LOGGED_IN)
+			continue;
+
+		pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+		conn_p->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+	return 0;
+}
+
+int iscsit_logout_closeconnection(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+	struct iscsi_conn *l_conn;
+	struct iscsi_session *sess = conn->sess;
+
+	pr_debug("Received logout request CLOSECONNECTION for CID:"
+		" %hu on CID: %hu.\n", cmd->logout_cid, conn->cid);
+
+	/*
+	 * A Logout Request with a CLOSECONNECTION reason code for a CID
+	 * can arrive on a connection with a differing CID.
+	 */
+	if (conn->cid == cmd->logout_cid) {
+		spin_lock_bh(&conn->state_lock);
+		pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+		conn->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+
+		atomic_set(&conn->conn_logout_remove, 1);
+		conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_CONNECTION;
+		iscsit_inc_conn_usage_count(conn);
+
+		spin_unlock_bh(&conn->state_lock);
+	} else {
+		/*
+		 * Handle all different cid CLOSECONNECTION requests in
+		 * iscsit_logout_post_handler_diffcid() as to give enough
+		 * time for any non immediate command's CmdSN to be
+		 * acknowledged on the connection in question.
+		 *
+		 * Here we simply make sure the CID is still around.
+		 */
+		l_conn = iscsit_get_conn_from_cid(sess,
+				cmd->logout_cid);
+		if (!l_conn) {
+			cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND;
+			iscsit_add_cmd_to_response_queue(cmd, conn,
+					cmd->i_state);
+			return 0;
+		}
+
+		iscsit_dec_conn_usage_count(l_conn);
+	}
+
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+	return 0;
+}
+
+int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+
+	pr_debug("Received explicit REMOVECONNFORRECOVERY logout for"
+		" CID: %hu on CID: %hu.\n", cmd->logout_cid, conn->cid);
+
+	if (sess->sess_ops->ErrorRecoveryLevel != 2) {
+		pr_err("Received Logout Request REMOVECONNFORRECOVERY"
+			" while ERL!=2.\n");
+		cmd->logout_response = ISCSI_LOGOUT_RECOVERY_UNSUPPORTED;
+		iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+		return 0;
+	}
+
+	if (conn->cid == cmd->logout_cid) {
+		pr_err("Received Logout Request REMOVECONNFORRECOVERY"
+			" with CID: %hu on CID: %hu, implementation error.\n",
+				cmd->logout_cid, conn->cid);
+		cmd->logout_response = ISCSI_LOGOUT_CLEANUP_FAILED;
+		iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+		return 0;
+	}
+
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+	return 0;
+}
+
+static int iscsit_handle_logout_cmd(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	int cmdsn_ret, logout_remove = 0;
+	u8 reason_code = 0;
+	struct iscsi_cmd *cmd;
+	struct iscsi_logout *hdr;
+	struct iscsi_tiqn *tiqn = iscsit_snmp_get_tiqn(conn);
+
+	hdr			= (struct iscsi_logout *) buf;
+	reason_code		= (hdr->flags & 0x7f);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->cid		= be16_to_cpu(hdr->cid);
+	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
+	hdr->exp_statsn	= be32_to_cpu(hdr->exp_statsn);
+
+	if (tiqn) {
+		spin_lock(&tiqn->logout_stats.lock);
+		if (reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION)
+			tiqn->logout_stats.normal_logouts++;
+		else
+			tiqn->logout_stats.abnormal_logouts++;
+		spin_unlock(&tiqn->logout_stats.lock);
+	}
+
+	pr_debug("Got Logout Request ITT: 0x%08x CmdSN: 0x%08x"
+		" ExpStatSN: 0x%08x Reason: 0x%02x CID: %hu on CID: %hu\n",
+		hdr->itt, hdr->cmdsn, hdr->exp_statsn, reason_code,
+		hdr->cid, conn->cid);
+
+	if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) {
+		pr_err("Received logout request on connection that"
+			" is not in logged in state, ignoring request.\n");
+		return 0;
+	}
+
+	cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+	if (!cmd)
+		return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1,
+					buf, conn);
+
+	cmd->iscsi_opcode       = ISCSI_OP_LOGOUT;
+	cmd->i_state            = ISTATE_SEND_LOGOUTRSP;
+	cmd->immediate_cmd      = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+	conn->sess->init_task_tag = cmd->init_task_tag  = hdr->itt;
+	cmd->targ_xfer_tag      = 0xFFFFFFFF;
+	cmd->cmd_sn             = hdr->cmdsn;
+	cmd->exp_stat_sn        = hdr->exp_statsn;
+	cmd->logout_cid         = hdr->cid;
+	cmd->logout_reason      = reason_code;
+	cmd->data_direction     = DMA_NONE;
+
+	/*
+	 * We need to sleep in these cases (by returning 1) until the Logout
+	 * Response gets sent in the tx thread.
+	 */
+	if ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) ||
+	   ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) &&
+	    (hdr->cid == conn->cid)))
+		logout_remove = 1;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	if (reason_code != ISCSI_LOGOUT_REASON_RECOVERY)
+		iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+	/*
+	 * Immediate commands are executed, well, immediately.
+	 * Non-Immediate Logout Commands are executed in CmdSN order.
+	 */
+	if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+		int ret = iscsit_execute_cmd(cmd, 0);
+
+		if (ret < 0)
+			return ret;
+	} else {
+		cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+		if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+			logout_remove = 0;
+		} else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) {
+			return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_PROTOCOL_ERROR,
+				1, 0, buf, cmd);
+		}
+	}
+
+	return logout_remove;
+}
+
+static int iscsit_handle_snack(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	u32 unpacked_lun;
+	u64 lun;
+	struct iscsi_snack *hdr;
+
+	hdr			= (struct iscsi_snack *) buf;
+	hdr->flags		&= ~ISCSI_FLAG_CMD_FINAL;
+	lun			= get_unaligned_le64(&hdr->lun);
+	unpacked_lun		= scsilun_to_int((struct scsi_lun *)&lun);
+	hdr->itt		= be32_to_cpu(hdr->itt);
+	hdr->ttt		= be32_to_cpu(hdr->ttt);
+	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
+	hdr->begrun		= be32_to_cpu(hdr->begrun);
+	hdr->runlength		= be32_to_cpu(hdr->runlength);
+
+	pr_debug("Got ISCSI_INIT_SNACK, ITT: 0x%08x, ExpStatSN:"
+		" 0x%08x, Type: 0x%02x, BegRun: 0x%08x, RunLength: 0x%08x,"
+		" CID: %hu\n", hdr->itt, hdr->exp_statsn, hdr->flags,
+			hdr->begrun, hdr->runlength, conn->cid);
+
+	if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+		pr_err("Initiator sent SNACK request while in"
+			" ErrorRecoveryLevel=0.\n");
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+	/*
+	 * SNACK_DATA and SNACK_R2T are both 0,  so check which function to
+	 * call from inside iscsi_send_recovery_datain_or_r2t().
+	 */
+	switch (hdr->flags & ISCSI_FLAG_SNACK_TYPE_MASK) {
+	case 0:
+		return iscsit_handle_recovery_datain_or_r2t(conn, buf,
+			hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
+		return 0;
+	case ISCSI_FLAG_SNACK_TYPE_STATUS:
+		return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
+			hdr->begrun, hdr->runlength);
+	case ISCSI_FLAG_SNACK_TYPE_DATA_ACK:
+		return iscsit_handle_data_ack(conn, hdr->ttt, hdr->begrun,
+			hdr->runlength);
+	case ISCSI_FLAG_SNACK_TYPE_RDATA:
+		/* FIXME: Support R-Data SNACK */
+		pr_err("R-Data SNACK Not Supported.\n");
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	default:
+		pr_err("Unknown SNACK type 0x%02x, protocol"
+			" error.\n", hdr->flags & 0x0f);
+		return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buf, conn);
+	}
+
+	return 0;
+}
+
+static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn)
+{
+	if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) ||
+	    (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) {
+		wait_for_completion_interruptible_timeout(
+					&conn->rx_half_close_comp,
+					ISCSI_RX_THREAD_TCP_TIMEOUT * HZ);
+	}
+}
+
+static int iscsit_handle_immediate_data(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf,
+	u32 length)
+{
+	int iov_ret, rx_got = 0, rx_size = 0;
+	u32 checksum, iov_count = 0, padding = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct kvec *iov;
+
+	iov_ret = iscsit_map_iovec(cmd, cmd->iov_data, cmd->write_data_done, length);
+	if (iov_ret < 0)
+		return IMMEDIATE_DATA_CANNOT_RECOVER;
+
+	rx_size = length;
+	iov_count = iov_ret;
+	iov = &cmd->iov_data[0];
+
+	padding = ((-length) & 3);
+	if (padding != 0) {
+		iov[iov_count].iov_base	= cmd->pad_bytes;
+		iov[iov_count++].iov_len = padding;
+		rx_size += padding;
+	}
+
+	if (conn->conn_ops->DataDigest) {
+		iov[iov_count].iov_base		= &checksum;
+		iov[iov_count++].iov_len	= ISCSI_CRC_LEN;
+		rx_size += ISCSI_CRC_LEN;
+	}
+
+	rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size);
+
+	iscsit_unmap_iovec(cmd);
+
+	if (rx_got != rx_size) {
+		iscsit_rx_thread_wait_for_tcp(conn);
+		return IMMEDIATE_DATA_CANNOT_RECOVER;
+	}
+
+	if (conn->conn_ops->DataDigest) {
+		u32 data_crc;
+
+		data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd,
+						    cmd->write_data_done, length, padding,
+						    cmd->pad_bytes);
+
+		if (checksum != data_crc) {
+			pr_err("ImmediateData CRC32C DataDigest 0x%08x"
+				" does not match computed 0x%08x\n", checksum,
+				data_crc);
+
+			if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+				pr_err("Unable to recover from"
+					" Immediate Data digest failure while"
+					" in ERL=0.\n");
+				iscsit_add_reject_from_cmd(
+						ISCSI_REASON_DATA_DIGEST_ERROR,
+						1, 0, buf, cmd);
+				return IMMEDIATE_DATA_CANNOT_RECOVER;
+			} else {
+				iscsit_add_reject_from_cmd(
+						ISCSI_REASON_DATA_DIGEST_ERROR,
+						0, 0, buf, cmd);
+				return IMMEDIATE_DATA_ERL1_CRC_FAILURE;
+			}
+		} else {
+			pr_debug("Got CRC32C DataDigest 0x%08x for"
+				" %u bytes of Immediate Data\n", checksum,
+				length);
+		}
+	}
+
+	cmd->write_data_done += length;
+
+	if (cmd->write_data_done == cmd->data_length) {
+		spin_lock_bh(&cmd->istate_lock);
+		cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
+		cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
+		spin_unlock_bh(&cmd->istate_lock);
+	}
+
+	return IMMEDIATE_DATA_NORMAL_OPERATION;
+}
+
+/*
+ *	Called with sess->conn_lock held.
+ */
+/* #warning iscsi_build_conn_drop_async_message() only sends out on connections
+	with active network interface */
+static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn)
+{
+	struct iscsi_cmd *cmd;
+	struct iscsi_conn *conn_p;
+
+	/*
+	 * Only send a Asynchronous Message on connections whos network
+	 * interface is still functional.
+	 */
+	list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) {
+		if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) {
+			iscsit_inc_conn_usage_count(conn_p);
+			break;
+		}
+	}
+
+	if (!conn_p)
+		return;
+
+	cmd = iscsit_allocate_cmd(conn_p, GFP_KERNEL);
+	if (!cmd) {
+		iscsit_dec_conn_usage_count(conn_p);
+		return;
+	}
+
+	cmd->logout_cid = conn->cid;
+	cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT;
+	cmd->i_state = ISTATE_SEND_ASYNCMSG;
+
+	spin_lock_bh(&conn_p->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn_p->conn_cmd_list);
+	spin_unlock_bh(&conn_p->cmd_lock);
+
+	iscsit_add_cmd_to_response_queue(cmd, conn_p, cmd->i_state);
+	iscsit_dec_conn_usage_count(conn_p);
+}
+
+static int iscsit_send_conn_drop_async_message(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_async *hdr;
+
+	cmd->tx_size = ISCSI_HDR_LEN;
+	cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT;
+
+	hdr			= (struct iscsi_async *) cmd->pdu;
+	hdr->opcode		= ISCSI_OP_ASYNC_EVENT;
+	hdr->flags		= ISCSI_FLAG_CMD_FINAL;
+	cmd->init_task_tag	= 0xFFFFFFFF;
+	cmd->targ_xfer_tag	= 0xFFFFFFFF;
+	put_unaligned_be64(0xFFFFFFFFFFFFFFFFULL, &hdr->rsvd4[0]);
+	cmd->stat_sn		= conn->stat_sn++;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+	hdr->async_event	= ISCSI_ASYNC_MSG_DROPPING_CONNECTION;
+	hdr->param1		= cpu_to_be16(cmd->logout_cid);
+	hdr->param2		= cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait);
+	hdr->param3		= cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain);
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		cmd->tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32C HeaderDigest to"
+			" Async Message 0x%08x\n", *header_digest);
+	}
+
+	cmd->iov_misc[0].iov_base	= cmd->pdu;
+	cmd->iov_misc[0].iov_len	= cmd->tx_size;
+	cmd->iov_misc_count		= 1;
+
+	pr_debug("Sending Connection Dropped Async Message StatSN:"
+		" 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn,
+			cmd->logout_cid, conn->cid);
+	return 0;
+}
+
+static int iscsit_send_data_in(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn,
+	int *eodr)
+{
+	int iov_ret = 0, set_statsn = 0;
+	u32 iov_count = 0, tx_size = 0;
+	struct iscsi_datain datain;
+	struct iscsi_datain_req *dr;
+	struct iscsi_data_rsp *hdr;
+	struct kvec *iov;
+
+	memset(&datain, 0, sizeof(struct iscsi_datain));
+	dr = iscsit_get_datain_values(cmd, &datain);
+	if (!dr) {
+		pr_err("iscsit_get_datain_values failed for ITT: 0x%08x\n",
+				cmd->init_task_tag);
+		return -1;
+	}
+
+	/*
+	 * Be paranoid and double check the logic for now.
+	 */
+	if ((datain.offset + datain.length) > cmd->data_length) {
+		pr_err("Command ITT: 0x%08x, datain.offset: %u and"
+			" datain.length: %u exceeds cmd->data_length: %u\n",
+			cmd->init_task_tag, datain.offset, datain.length,
+				cmd->data_length);
+		return -1;
+	}
+
+	spin_lock_bh(&conn->sess->session_stats_lock);
+	conn->sess->tx_data_octets += datain.length;
+	if (conn->sess->se_sess->se_node_acl) {
+		spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+		conn->sess->se_sess->se_node_acl->read_bytes += datain.length;
+		spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+	}
+	spin_unlock_bh(&conn->sess->session_stats_lock);
+	/*
+	 * Special case for successfully execution w/ both DATAIN
+	 * and Sense Data.
+	 */
+	if ((datain.flags & ISCSI_FLAG_DATA_STATUS) &&
+	    (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE))
+		datain.flags &= ~ISCSI_FLAG_DATA_STATUS;
+	else {
+		if ((dr->dr_complete == DATAIN_COMPLETE_NORMAL) ||
+		    (dr->dr_complete == DATAIN_COMPLETE_CONNECTION_RECOVERY)) {
+			iscsit_increment_maxcmdsn(cmd, conn->sess);
+			cmd->stat_sn = conn->stat_sn++;
+			set_statsn = 1;
+		} else if (dr->dr_complete ==
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY)
+			set_statsn = 1;
+	}
+
+	hdr	= (struct iscsi_data_rsp *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_SCSI_DATA_IN;
+	hdr->flags		= datain.flags;
+	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+		if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) {
+			hdr->flags |= ISCSI_FLAG_DATA_OVERFLOW;
+			hdr->residual_count = cpu_to_be32(cmd->residual_count);
+		} else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) {
+			hdr->flags |= ISCSI_FLAG_DATA_UNDERFLOW;
+			hdr->residual_count = cpu_to_be32(cmd->residual_count);
+		}
+	}
+	hton24(hdr->dlength, datain.length);
+	if (hdr->flags & ISCSI_FLAG_DATA_ACK)
+		int_to_scsilun(cmd->se_cmd.orig_fe_lun,
+				(struct scsi_lun *)&hdr->lun);
+	else
+		put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun);
+
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	hdr->ttt		= (hdr->flags & ISCSI_FLAG_DATA_ACK) ?
+				   cpu_to_be32(cmd->targ_xfer_tag) :
+				   0xFFFFFFFF;
+	hdr->statsn		= (set_statsn) ? cpu_to_be32(cmd->stat_sn) :
+						0xFFFFFFFF;
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+	hdr->datasn		= cpu_to_be32(datain.data_sn);
+	hdr->offset		= cpu_to_be32(datain.offset);
+
+	iov = &cmd->iov_data[0];
+	iov[iov_count].iov_base	= cmd->pdu;
+	iov[iov_count++].iov_len	= ISCSI_HDR_LEN;
+	tx_size += ISCSI_HDR_LEN;
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		iov[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+
+		pr_debug("Attaching CRC32 HeaderDigest"
+			" for DataIN PDU 0x%08x\n", *header_digest);
+	}
+
+	iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], datain.offset, datain.length);
+	if (iov_ret < 0)
+		return -1;
+
+	iov_count += iov_ret;
+	tx_size += datain.length;
+
+	cmd->padding = ((-datain.length) & 3);
+	if (cmd->padding) {
+		iov[iov_count].iov_base		= cmd->pad_bytes;
+		iov[iov_count++].iov_len	= cmd->padding;
+		tx_size += cmd->padding;
+
+		pr_debug("Attaching %u padding bytes\n",
+				cmd->padding);
+	}
+	if (conn->conn_ops->DataDigest) {
+		cmd->data_crc = iscsit_do_crypto_hash_sg(&conn->conn_tx_hash, cmd,
+			 datain.offset, datain.length, cmd->padding, cmd->pad_bytes);
+
+		iov[iov_count].iov_base	= &cmd->data_crc;
+		iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+
+		pr_debug("Attached CRC32C DataDigest %d bytes, crc"
+			" 0x%08x\n", datain.length+cmd->padding, cmd->data_crc);
+	}
+
+	cmd->iov_data_count = iov_count;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Built DataIN ITT: 0x%08x, StatSN: 0x%08x,"
+		" DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
+		cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn),
+		ntohl(hdr->offset), datain.length, conn->cid);
+
+	if (dr->dr_complete) {
+		*eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ?
+				2 : 1;
+		iscsit_free_datain_req(cmd, dr);
+	}
+
+	return 0;
+}
+
+static int iscsit_send_logout_response(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	int niov = 0, tx_size;
+	struct iscsi_conn *logout_conn = NULL;
+	struct iscsi_conn_recovery *cr = NULL;
+	struct iscsi_session *sess = conn->sess;
+	struct kvec *iov;
+	struct iscsi_logout_rsp *hdr;
+	/*
+	 * The actual shutting down of Sessions and/or Connections
+	 * for CLOSESESSION and CLOSECONNECTION Logout Requests
+	 * is done in scsi_logout_post_handler().
+	 */
+	switch (cmd->logout_reason) {
+	case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+		pr_debug("iSCSI session logout successful, setting"
+			" logout response to ISCSI_LOGOUT_SUCCESS.\n");
+		cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+		break;
+	case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+		if (cmd->logout_response == ISCSI_LOGOUT_CID_NOT_FOUND)
+			break;
+		/*
+		 * For CLOSECONNECTION logout requests carrying
+		 * a matching logout CID -> local CID, the reference
+		 * for the local CID will have been incremented in
+		 * iscsi_logout_closeconnection().
+		 *
+		 * For CLOSECONNECTION logout requests carrying
+		 * a different CID than the connection it arrived
+		 * on, the connection responding to cmd->logout_cid
+		 * is stopped in iscsit_logout_post_handler_diffcid().
+		 */
+
+		pr_debug("iSCSI CID: %hu logout on CID: %hu"
+			" successful.\n", cmd->logout_cid, conn->cid);
+		cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+		break;
+	case ISCSI_LOGOUT_REASON_RECOVERY:
+		if ((cmd->logout_response == ISCSI_LOGOUT_RECOVERY_UNSUPPORTED) ||
+		    (cmd->logout_response == ISCSI_LOGOUT_CLEANUP_FAILED))
+			break;
+		/*
+		 * If the connection is still active from our point of view
+		 * force connection recovery to occur.
+		 */
+		logout_conn = iscsit_get_conn_from_cid_rcfr(sess,
+				cmd->logout_cid);
+		if ((logout_conn)) {
+			iscsit_connection_reinstatement_rcfr(logout_conn);
+			iscsit_dec_conn_usage_count(logout_conn);
+		}
+
+		cr = iscsit_get_inactive_connection_recovery_entry(
+				conn->sess, cmd->logout_cid);
+		if (!cr) {
+			pr_err("Unable to locate CID: %hu for"
+			" REMOVECONNFORRECOVERY Logout Request.\n",
+				cmd->logout_cid);
+			cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND;
+			break;
+		}
+
+		iscsit_discard_cr_cmds_by_expstatsn(cr, cmd->exp_stat_sn);
+
+		pr_debug("iSCSI REMOVECONNFORRECOVERY logout"
+			" for recovery for CID: %hu on CID: %hu successful.\n",
+				cmd->logout_cid, conn->cid);
+		cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+		break;
+	default:
+		pr_err("Unknown cmd->logout_reason: 0x%02x\n",
+				cmd->logout_reason);
+		return -1;
+	}
+
+	tx_size = ISCSI_HDR_LEN;
+	hdr			= (struct iscsi_logout_rsp *)cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_LOGOUT_RSP;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	hdr->response		= cmd->logout_response;
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	cmd->stat_sn		= conn->stat_sn++;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+
+	iscsit_increment_maxcmdsn(cmd, conn->sess);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	iov = &cmd->iov_misc[0];
+	iov[niov].iov_base	= cmd->pdu;
+	iov[niov++].iov_len	= ISCSI_HDR_LEN;
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		iov[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32C HeaderDigest to"
+			" Logout Response 0x%08x\n", *header_digest);
+	}
+	cmd->iov_misc_count = niov;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Sending Logout Response ITT: 0x%08x StatSN:"
+		" 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n",
+		cmd->init_task_tag, cmd->stat_sn, hdr->response,
+		cmd->logout_cid, conn->cid);
+
+	return 0;
+}
+
+/*
+ *	Unsolicited NOPIN, either requesting a response or not.
+ */
+static int iscsit_send_unsolicited_nopin(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn,
+	int want_response)
+{
+	int tx_size = ISCSI_HDR_LEN;
+	struct iscsi_nopin *hdr;
+
+	hdr			= (struct iscsi_nopin *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_NOOP_IN;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	hdr->ttt		= cpu_to_be32(cmd->targ_xfer_tag);
+	cmd->stat_sn		= conn->stat_sn;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32C HeaderDigest to"
+			" NopIN 0x%08x\n", *header_digest);
+	}
+
+	cmd->iov_misc[0].iov_base	= cmd->pdu;
+	cmd->iov_misc[0].iov_len	= tx_size;
+	cmd->iov_misc_count	= 1;
+	cmd->tx_size		= tx_size;
+
+	pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:"
+		" 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid);
+
+	return 0;
+}
+
+static int iscsit_send_nopin_response(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	int niov = 0, tx_size;
+	u32 padding = 0;
+	struct kvec *iov;
+	struct iscsi_nopin *hdr;
+
+	tx_size = ISCSI_HDR_LEN;
+	hdr			= (struct iscsi_nopin *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_NOOP_IN;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	hton24(hdr->dlength, cmd->buf_ptr_size);
+	put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun);
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	hdr->ttt		= cpu_to_be32(cmd->targ_xfer_tag);
+	cmd->stat_sn		= conn->stat_sn++;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+
+	iscsit_increment_maxcmdsn(cmd, conn->sess);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	iov = &cmd->iov_misc[0];
+	iov[niov].iov_base	= cmd->pdu;
+	iov[niov++].iov_len	= ISCSI_HDR_LEN;
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		iov[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32C HeaderDigest"
+			" to NopIn 0x%08x\n", *header_digest);
+	}
+
+	/*
+	 * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr.
+	 * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size.
+	 */
+	if (cmd->buf_ptr_size) {
+		iov[niov].iov_base	= cmd->buf_ptr;
+		iov[niov++].iov_len	= cmd->buf_ptr_size;
+		tx_size += cmd->buf_ptr_size;
+
+		pr_debug("Echoing back %u bytes of ping"
+			" data.\n", cmd->buf_ptr_size);
+
+		padding = ((-cmd->buf_ptr_size) & 3);
+		if (padding != 0) {
+			iov[niov].iov_base = &cmd->pad_bytes;
+			iov[niov++].iov_len = padding;
+			tx_size += padding;
+			pr_debug("Attaching %u additional"
+				" padding bytes.\n", padding);
+		}
+		if (conn->conn_ops->DataDigest) {
+			iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				cmd->buf_ptr, cmd->buf_ptr_size,
+				padding, (u8 *)&cmd->pad_bytes,
+				(u8 *)&cmd->data_crc);
+
+			iov[niov].iov_base = &cmd->data_crc;
+			iov[niov++].iov_len = ISCSI_CRC_LEN;
+			tx_size += ISCSI_CRC_LEN;
+			pr_debug("Attached DataDigest for %u"
+				" bytes of ping data, CRC 0x%08x\n",
+				cmd->buf_ptr_size, cmd->data_crc);
+		}
+	}
+
+	cmd->iov_misc_count = niov;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Sending NOPIN Response ITT: 0x%08x, TTT:"
+		" 0x%08x, StatSN: 0x%08x, Length %u\n", cmd->init_task_tag,
+		cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size);
+
+	return 0;
+}
+
+int iscsit_send_r2t(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	int tx_size = 0;
+	struct iscsi_r2t *r2t;
+	struct iscsi_r2t_rsp *hdr;
+
+	r2t = iscsit_get_r2t_from_list(cmd);
+	if (!r2t)
+		return -1;
+
+	hdr			= (struct iscsi_r2t_rsp *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_R2T;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	int_to_scsilun(cmd->se_cmd.orig_fe_lun,
+			(struct scsi_lun *)&hdr->lun);
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	spin_lock_bh(&conn->sess->ttt_lock);
+	r2t->targ_xfer_tag	= conn->sess->targ_xfer_tag++;
+	if (r2t->targ_xfer_tag == 0xFFFFFFFF)
+		r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+	spin_unlock_bh(&conn->sess->ttt_lock);
+	hdr->ttt		= cpu_to_be32(r2t->targ_xfer_tag);
+	hdr->statsn		= cpu_to_be32(conn->stat_sn);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+	hdr->r2tsn		= cpu_to_be32(r2t->r2t_sn);
+	hdr->data_offset	= cpu_to_be32(r2t->offset);
+	hdr->data_length	= cpu_to_be32(r2t->xfer_len);
+
+	cmd->iov_misc[0].iov_base	= cmd->pdu;
+	cmd->iov_misc[0].iov_len	= ISCSI_HDR_LEN;
+	tx_size += ISCSI_HDR_LEN;
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32 HeaderDigest for R2T"
+			" PDU 0x%08x\n", *header_digest);
+	}
+
+	pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:"
+		" 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n",
+		(!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag,
+		r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn,
+			r2t->offset, r2t->xfer_len, conn->cid);
+
+	cmd->iov_misc_count = 1;
+	cmd->tx_size = tx_size;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	r2t->sent_r2t = 1;
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	return 0;
+}
+
+/*
+ *	type 0: Normal Operation.
+ *	type 1: Called from Storage Transport.
+ *	type 2: Called from iscsi_task_reassign_complete_write() for
+ *	        connection recovery.
+ */
+int iscsit_build_r2ts_for_cmd(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn,
+	int type)
+{
+	int first_r2t = 1;
+	u32 offset = 0, xfer_len = 0;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	if (cmd->cmd_flags & ICF_SENT_LAST_R2T) {
+		spin_unlock_bh(&cmd->r2t_lock);
+		return 0;
+	}
+
+	if (conn->sess->sess_ops->DataSequenceInOrder && (type != 2))
+		if (cmd->r2t_offset < cmd->write_data_done)
+			cmd->r2t_offset = cmd->write_data_done;
+
+	while (cmd->outstanding_r2ts < conn->sess->sess_ops->MaxOutstandingR2T) {
+		if (conn->sess->sess_ops->DataSequenceInOrder) {
+			offset = cmd->r2t_offset;
+
+			if (first_r2t && (type == 2)) {
+				xfer_len = ((offset +
+					     (conn->sess->sess_ops->MaxBurstLength -
+					     cmd->next_burst_len) >
+					     cmd->data_length) ?
+					    (cmd->data_length - offset) :
+					    (conn->sess->sess_ops->MaxBurstLength -
+					     cmd->next_burst_len));
+			} else {
+				xfer_len = ((offset +
+					     conn->sess->sess_ops->MaxBurstLength) >
+					     cmd->data_length) ?
+					     (cmd->data_length - offset) :
+					     conn->sess->sess_ops->MaxBurstLength;
+			}
+			cmd->r2t_offset += xfer_len;
+
+			if (cmd->r2t_offset == cmd->data_length)
+				cmd->cmd_flags |= ICF_SENT_LAST_R2T;
+		} else {
+			struct iscsi_seq *seq;
+
+			seq = iscsit_get_seq_holder_for_r2t(cmd);
+			if (!seq) {
+				spin_unlock_bh(&cmd->r2t_lock);
+				return -1;
+			}
+
+			offset = seq->offset;
+			xfer_len = seq->xfer_len;
+
+			if (cmd->seq_send_order == cmd->seq_count)
+				cmd->cmd_flags |= ICF_SENT_LAST_R2T;
+		}
+		cmd->outstanding_r2ts++;
+		first_r2t = 0;
+
+		if (iscsit_add_r2t_to_list(cmd, offset, xfer_len, 0, 0) < 0) {
+			spin_unlock_bh(&cmd->r2t_lock);
+			return -1;
+		}
+
+		if (cmd->cmd_flags & ICF_SENT_LAST_R2T)
+			break;
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	return 0;
+}
+
+static int iscsit_send_status(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	u8 iov_count = 0, recovery;
+	u32 padding = 0, tx_size = 0;
+	struct iscsi_scsi_rsp *hdr;
+	struct kvec *iov;
+
+	recovery = (cmd->i_state != ISTATE_SEND_STATUS);
+	if (!recovery)
+		cmd->stat_sn = conn->stat_sn++;
+
+	spin_lock_bh(&conn->sess->session_stats_lock);
+	conn->sess->rsp_pdus++;
+	spin_unlock_bh(&conn->sess->session_stats_lock);
+
+	hdr			= (struct iscsi_scsi_rsp *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_SCSI_CMD_RSP;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) {
+		hdr->flags |= ISCSI_FLAG_CMD_OVERFLOW;
+		hdr->residual_count = cpu_to_be32(cmd->residual_count);
+	} else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) {
+		hdr->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+		hdr->residual_count = cpu_to_be32(cmd->residual_count);
+	}
+	hdr->response		= cmd->iscsi_response;
+	hdr->cmd_status		= cmd->se_cmd.scsi_status;
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+
+	iscsit_increment_maxcmdsn(cmd, conn->sess);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	iov = &cmd->iov_misc[0];
+	iov[iov_count].iov_base	= cmd->pdu;
+	iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+	tx_size += ISCSI_HDR_LEN;
+
+	/*
+	 * Attach SENSE DATA payload to iSCSI Response PDU
+	 */
+	if (cmd->se_cmd.sense_buffer &&
+	   ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
+	    (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
+		padding		= -(cmd->se_cmd.scsi_sense_length) & 3;
+		hton24(hdr->dlength, cmd->se_cmd.scsi_sense_length);
+		iov[iov_count].iov_base	= cmd->se_cmd.sense_buffer;
+		iov[iov_count++].iov_len =
+				(cmd->se_cmd.scsi_sense_length + padding);
+		tx_size += cmd->se_cmd.scsi_sense_length;
+
+		if (padding) {
+			memset(cmd->se_cmd.sense_buffer +
+				cmd->se_cmd.scsi_sense_length, 0, padding);
+			tx_size += padding;
+			pr_debug("Adding %u bytes of padding to"
+				" SENSE.\n", padding);
+		}
+
+		if (conn->conn_ops->DataDigest) {
+			iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				cmd->se_cmd.sense_buffer,
+				(cmd->se_cmd.scsi_sense_length + padding),
+				0, NULL, (u8 *)&cmd->data_crc);
+
+			iov[iov_count].iov_base    = &cmd->data_crc;
+			iov[iov_count++].iov_len     = ISCSI_CRC_LEN;
+			tx_size += ISCSI_CRC_LEN;
+
+			pr_debug("Attaching CRC32 DataDigest for"
+				" SENSE, %u bytes CRC 0x%08x\n",
+				(cmd->se_cmd.scsi_sense_length + padding),
+				cmd->data_crc);
+		}
+
+		pr_debug("Attaching SENSE DATA: %u bytes to iSCSI"
+				" Response PDU\n",
+				cmd->se_cmd.scsi_sense_length);
+	}
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		iov[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32 HeaderDigest for Response"
+				" PDU 0x%08x\n", *header_digest);
+	}
+
+	cmd->iov_misc_count = iov_count;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Built %sSCSI Response, ITT: 0x%08x, StatSN: 0x%08x,"
+		" Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n",
+		(!recovery) ? "" : "Recovery ", cmd->init_task_tag,
+		cmd->stat_sn, 0x00, cmd->se_cmd.scsi_status, conn->cid);
+
+	return 0;
+}
+
+static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr)
+{
+	switch (se_tmr->response) {
+	case TMR_FUNCTION_COMPLETE:
+		return ISCSI_TMF_RSP_COMPLETE;
+	case TMR_TASK_DOES_NOT_EXIST:
+		return ISCSI_TMF_RSP_NO_TASK;
+	case TMR_LUN_DOES_NOT_EXIST:
+		return ISCSI_TMF_RSP_NO_LUN;
+	case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED:
+		return ISCSI_TMF_RSP_NOT_SUPPORTED;
+	case TMR_FUNCTION_AUTHORIZATION_FAILED:
+		return ISCSI_TMF_RSP_AUTH_FAILED;
+	case TMR_FUNCTION_REJECTED:
+	default:
+		return ISCSI_TMF_RSP_REJECTED;
+	}
+}
+
+static int iscsit_send_task_mgt_rsp(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+	struct iscsi_tm_rsp *hdr;
+	u32 tx_size = 0;
+
+	hdr			= (struct iscsi_tm_rsp *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_SCSI_TMFUNC_RSP;
+	hdr->response		= iscsit_convert_tcm_tmr_rsp(se_tmr);
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	cmd->stat_sn		= conn->stat_sn++;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+
+	iscsit_increment_maxcmdsn(cmd, conn->sess);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	cmd->iov_misc[0].iov_base	= cmd->pdu;
+	cmd->iov_misc[0].iov_len	= ISCSI_HDR_LEN;
+	tx_size += ISCSI_HDR_LEN;
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32 HeaderDigest for Task"
+			" Mgmt Response PDU 0x%08x\n", *header_digest);
+	}
+
+	cmd->iov_misc_count = 1;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Built Task Management Response ITT: 0x%08x,"
+		" StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n",
+		cmd->init_task_tag, cmd->stat_sn, hdr->response, conn->cid);
+
+	return 0;
+}
+
+static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+{
+	char *payload = NULL;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tiqn *tiqn;
+	struct iscsi_tpg_np *tpg_np;
+	int buffer_len, end_of_buf = 0, len = 0, payload_len = 0;
+	unsigned char buf[256];
+
+	buffer_len = (conn->conn_ops->MaxRecvDataSegmentLength > 32768) ?
+			32768 : conn->conn_ops->MaxRecvDataSegmentLength;
+
+	memset(buf, 0, 256);
+
+	payload = kzalloc(buffer_len, GFP_KERNEL);
+	if (!payload) {
+		pr_err("Unable to allocate memory for sendtargets"
+				" response.\n");
+		return -ENOMEM;
+	}
+
+	spin_lock(&tiqn_lock);
+	list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) {
+		len = sprintf(buf, "TargetName=%s", tiqn->tiqn);
+		len += 1;
+
+		if ((len + payload_len) > buffer_len) {
+			spin_unlock(&tiqn->tiqn_tpg_lock);
+			end_of_buf = 1;
+			goto eob;
+		}
+		memcpy((void *)payload + payload_len, buf, len);
+		payload_len += len;
+
+		spin_lock(&tiqn->tiqn_tpg_lock);
+		list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) {
+
+			spin_lock(&tpg->tpg_state_lock);
+			if ((tpg->tpg_state == TPG_STATE_FREE) ||
+			    (tpg->tpg_state == TPG_STATE_INACTIVE)) {
+				spin_unlock(&tpg->tpg_state_lock);
+				continue;
+			}
+			spin_unlock(&tpg->tpg_state_lock);
+
+			spin_lock(&tpg->tpg_np_lock);
+			list_for_each_entry(tpg_np, &tpg->tpg_gnp_list,
+						tpg_np_list) {
+				len = sprintf(buf, "TargetAddress="
+					"%s%s%s:%hu,%hu",
+					(tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ?
+					"[" : "", tpg_np->tpg_np->np_ip,
+					(tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ?
+					"]" : "", tpg_np->tpg_np->np_port,
+					tpg->tpgt);
+				len += 1;
+
+				if ((len + payload_len) > buffer_len) {
+					spin_unlock(&tpg->tpg_np_lock);
+					spin_unlock(&tiqn->tiqn_tpg_lock);
+					end_of_buf = 1;
+					goto eob;
+				}
+				memcpy((void *)payload + payload_len, buf, len);
+				payload_len += len;
+			}
+			spin_unlock(&tpg->tpg_np_lock);
+		}
+		spin_unlock(&tiqn->tiqn_tpg_lock);
+eob:
+		if (end_of_buf)
+			break;
+	}
+	spin_unlock(&tiqn_lock);
+
+	cmd->buf_ptr = payload;
+
+	return payload_len;
+}
+
+/*
+ *	FIXME: Add support for F_BIT and C_BIT when the length is longer than
+ *	MaxRecvDataSegmentLength.
+ */
+static int iscsit_send_text_rsp(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_text_rsp *hdr;
+	struct kvec *iov;
+	u32 padding = 0, tx_size = 0;
+	int text_length, iov_count = 0;
+
+	text_length = iscsit_build_sendtargets_response(cmd);
+	if (text_length < 0)
+		return text_length;
+
+	padding = ((-text_length) & 3);
+	if (padding != 0) {
+		memset(cmd->buf_ptr + text_length, 0, padding);
+		pr_debug("Attaching %u additional bytes for"
+			" padding.\n", padding);
+	}
+
+	hdr			= (struct iscsi_text_rsp *) cmd->pdu;
+	memset(hdr, 0, ISCSI_HDR_LEN);
+	hdr->opcode		= ISCSI_OP_TEXT_RSP;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	hton24(hdr->dlength, text_length);
+	hdr->itt		= cpu_to_be32(cmd->init_task_tag);
+	hdr->ttt		= cpu_to_be32(cmd->targ_xfer_tag);
+	cmd->stat_sn		= conn->stat_sn++;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+
+	iscsit_increment_maxcmdsn(cmd, conn->sess);
+	hdr->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	iov = &cmd->iov_misc[0];
+
+	iov[iov_count].iov_base = cmd->pdu;
+	iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+	iov[iov_count].iov_base	= cmd->buf_ptr;
+	iov[iov_count++].iov_len = text_length + padding;
+
+	tx_size += (ISCSI_HDR_LEN + text_length + padding);
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		iov[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32 HeaderDigest for"
+			" Text Response PDU 0x%08x\n", *header_digest);
+	}
+
+	if (conn->conn_ops->DataDigest) {
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				cmd->buf_ptr, (text_length + padding),
+				0, NULL, (u8 *)&cmd->data_crc);
+
+		iov[iov_count].iov_base	= &cmd->data_crc;
+		iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+		tx_size	+= ISCSI_CRC_LEN;
+
+		pr_debug("Attaching DataDigest for %u bytes of text"
+			" data, CRC 0x%08x\n", (text_length + padding),
+			cmd->data_crc);
+	}
+
+	cmd->iov_misc_count = iov_count;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x,"
+		" Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn,
+			text_length, conn->cid);
+	return 0;
+}
+
+static int iscsit_send_reject(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	u32 iov_count = 0, tx_size = 0;
+	struct iscsi_reject *hdr;
+	struct kvec *iov;
+
+	hdr			= (struct iscsi_reject *) cmd->pdu;
+	hdr->opcode		= ISCSI_OP_REJECT;
+	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+	hton24(hdr->dlength, ISCSI_HDR_LEN);
+	cmd->stat_sn		= conn->stat_sn++;
+	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+	hdr->exp_cmdsn	= cpu_to_be32(conn->sess->exp_cmd_sn);
+	hdr->max_cmdsn	= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	iov = &cmd->iov_misc[0];
+
+	iov[iov_count].iov_base = cmd->pdu;
+	iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+	iov[iov_count].iov_base = cmd->buf_ptr;
+	iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+
+	tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN);
+
+	if (conn->conn_ops->HeaderDigest) {
+		u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)hdr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)header_digest);
+
+		iov[0].iov_len += ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32 HeaderDigest for"
+			" REJECT PDU 0x%08x\n", *header_digest);
+	}
+
+	if (conn->conn_ops->DataDigest) {
+		iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+				(unsigned char *)cmd->buf_ptr, ISCSI_HDR_LEN,
+				0, NULL, (u8 *)&cmd->data_crc);
+
+		iov[iov_count].iov_base = &cmd->data_crc;
+		iov[iov_count++].iov_len  = ISCSI_CRC_LEN;
+		tx_size += ISCSI_CRC_LEN;
+		pr_debug("Attaching CRC32 DataDigest for REJECT"
+				" PDU 0x%08x\n", cmd->data_crc);
+	}
+
+	cmd->iov_misc_count = iov_count;
+	cmd->tx_size = tx_size;
+
+	pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x,"
+		" CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid);
+
+	return 0;
+}
+
+static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn)
+{
+	if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) ||
+	    (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) {
+		wait_for_completion_interruptible_timeout(
+					&conn->tx_half_close_comp,
+					ISCSI_TX_THREAD_TCP_TIMEOUT * HZ);
+	}
+}
+
+#ifdef CONFIG_SMP
+
+void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
+{
+	struct iscsi_thread_set *ts = conn->thread_set;
+	int ord, cpu;
+	/*
+	 * thread_id is assigned from iscsit_global->ts_bitmap from
+	 * within iscsi_thread_set.c:iscsi_allocate_thread_sets()
+	 *
+	 * Here we use thread_id to determine which CPU that this
+	 * iSCSI connection's iscsi_thread_set will be scheduled to
+	 * execute upon.
+	 */
+	ord = ts->thread_id % cpumask_weight(cpu_online_mask);
+#if 0
+	pr_debug(">>>>>>>>>>>>>>>>>>>> Generated ord: %d from"
+			" thread_id: %d\n", ord, ts->thread_id);
+#endif
+	for_each_online_cpu(cpu) {
+		if (ord-- == 0) {
+			cpumask_set_cpu(cpu, conn->conn_cpumask);
+			return;
+		}
+	}
+	/*
+	 * This should never be reached..
+	 */
+	dump_stack();
+	cpumask_setall(conn->conn_cpumask);
+}
+
+static inline void iscsit_thread_check_cpumask(
+	struct iscsi_conn *conn,
+	struct task_struct *p,
+	int mode)
+{
+	char buf[128];
+	/*
+	 * mode == 1 signals iscsi_target_tx_thread() usage.
+	 * mode == 0 signals iscsi_target_rx_thread() usage.
+	 */
+	if (mode == 1) {
+		if (!conn->conn_tx_reset_cpumask)
+			return;
+		conn->conn_tx_reset_cpumask = 0;
+	} else {
+		if (!conn->conn_rx_reset_cpumask)
+			return;
+		conn->conn_rx_reset_cpumask = 0;
+	}
+	/*
+	 * Update the CPU mask for this single kthread so that
+	 * both TX and RX kthreads are scheduled to run on the
+	 * same CPU.
+	 */
+	memset(buf, 0, 128);
+	cpumask_scnprintf(buf, 128, conn->conn_cpumask);
+#if 0
+	pr_debug(">>>>>>>>>>>>>> Calling set_cpus_allowed_ptr():"
+			" %s for %s\n", buf, p->comm);
+#endif
+	set_cpus_allowed_ptr(p, conn->conn_cpumask);
+}
+
+#else
+#define iscsit_thread_get_cpumask(X) ({})
+#define iscsit_thread_check_cpumask(X, Y, Z) ({})
+#endif /* CONFIG_SMP */
+
+int iscsi_target_tx_thread(void *arg)
+{
+	u8 state;
+	int eodr = 0;
+	int ret = 0;
+	int sent_status = 0;
+	int use_misc = 0;
+	int map_sg = 0;
+	struct iscsi_cmd *cmd = NULL;
+	struct iscsi_conn *conn;
+	struct iscsi_queue_req *qr = NULL;
+	struct se_cmd *se_cmd;
+	struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg;
+	/*
+	 * Allow ourselves to be interrupted by SIGINT so that a
+	 * connection recovery / failure event can be triggered externally.
+	 */
+	allow_signal(SIGINT);
+
+restart:
+	conn = iscsi_tx_thread_pre_handler(ts);
+	if (!conn)
+		goto out;
+
+	eodr = map_sg = ret = sent_status = use_misc = 0;
+
+	while (!kthread_should_stop()) {
+		/*
+		 * Ensure that both TX and RX per connection kthreads
+		 * are scheduled to run on the same CPU.
+		 */
+		iscsit_thread_check_cpumask(conn, current, 1);
+
+		schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT);
+
+		if ((ts->status == ISCSI_THREAD_SET_RESET) ||
+		     signal_pending(current))
+			goto transport_err;
+
+get_immediate:
+		qr = iscsit_get_cmd_from_immediate_queue(conn);
+		if (qr) {
+			atomic_set(&conn->check_immediate_queue, 0);
+			cmd = qr->cmd;
+			state = qr->state;
+			kmem_cache_free(lio_qr_cache, qr);
+
+			spin_lock_bh(&cmd->istate_lock);
+			switch (state) {
+			case ISTATE_SEND_R2T:
+				spin_unlock_bh(&cmd->istate_lock);
+				ret = iscsit_send_r2t(cmd, conn);
+				break;
+			case ISTATE_REMOVE:
+				spin_unlock_bh(&cmd->istate_lock);
+
+				if (cmd->data_direction == DMA_TO_DEVICE)
+					iscsit_stop_dataout_timer(cmd);
+
+				spin_lock_bh(&conn->cmd_lock);
+				list_del(&cmd->i_list);
+				spin_unlock_bh(&conn->cmd_lock);
+				/*
+				 * Determine if a struct se_cmd is assoicated with
+				 * this struct iscsi_cmd.
+				 */
+				if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) &&
+				    !(cmd->tmr_req))
+					iscsit_release_cmd(cmd);
+				else
+					transport_generic_free_cmd(&cmd->se_cmd,
+								1, 0);
+				goto get_immediate;
+			case ISTATE_SEND_NOPIN_WANT_RESPONSE:
+				spin_unlock_bh(&cmd->istate_lock);
+				iscsit_mod_nopin_response_timer(conn);
+				ret = iscsit_send_unsolicited_nopin(cmd,
+						conn, 1);
+				break;
+			case ISTATE_SEND_NOPIN_NO_RESPONSE:
+				spin_unlock_bh(&cmd->istate_lock);
+				ret = iscsit_send_unsolicited_nopin(cmd,
+						conn, 0);
+				break;
+			default:
+				pr_err("Unknown Opcode: 0x%02x ITT:"
+				" 0x%08x, i_state: %d on CID: %hu\n",
+				cmd->iscsi_opcode, cmd->init_task_tag, state,
+				conn->cid);
+				spin_unlock_bh(&cmd->istate_lock);
+				goto transport_err;
+			}
+			if (ret < 0) {
+				conn->tx_immediate_queue = 0;
+				goto transport_err;
+			}
+
+			if (iscsit_send_tx_data(cmd, conn, 1) < 0) {
+				conn->tx_immediate_queue = 0;
+				iscsit_tx_thread_wait_for_tcp(conn);
+				goto transport_err;
+			}
+
+			spin_lock_bh(&cmd->istate_lock);
+			switch (state) {
+			case ISTATE_SEND_R2T:
+				spin_unlock_bh(&cmd->istate_lock);
+				spin_lock_bh(&cmd->dataout_timeout_lock);
+				iscsit_start_dataout_timer(cmd, conn);
+				spin_unlock_bh(&cmd->dataout_timeout_lock);
+				break;
+			case ISTATE_SEND_NOPIN_WANT_RESPONSE:
+				cmd->i_state = ISTATE_SENT_NOPIN_WANT_RESPONSE;
+				spin_unlock_bh(&cmd->istate_lock);
+				break;
+			case ISTATE_SEND_NOPIN_NO_RESPONSE:
+				cmd->i_state = ISTATE_SENT_STATUS;
+				spin_unlock_bh(&cmd->istate_lock);
+				break;
+			default:
+				pr_err("Unknown Opcode: 0x%02x ITT:"
+					" 0x%08x, i_state: %d on CID: %hu\n",
+					cmd->iscsi_opcode, cmd->init_task_tag,
+					state, conn->cid);
+				spin_unlock_bh(&cmd->istate_lock);
+				goto transport_err;
+			}
+			goto get_immediate;
+		} else
+			conn->tx_immediate_queue = 0;
+
+get_response:
+		qr = iscsit_get_cmd_from_response_queue(conn);
+		if (qr) {
+			cmd = qr->cmd;
+			state = qr->state;
+			kmem_cache_free(lio_qr_cache, qr);
+
+			spin_lock_bh(&cmd->istate_lock);
+check_rsp_state:
+			switch (state) {
+			case ISTATE_SEND_DATAIN:
+				spin_unlock_bh(&cmd->istate_lock);
+				ret = iscsit_send_data_in(cmd, conn,
+							  &eodr);
+				map_sg = 1;
+				break;
+			case ISTATE_SEND_STATUS:
+			case ISTATE_SEND_STATUS_RECOVERY:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_status(cmd, conn);
+				break;
+			case ISTATE_SEND_LOGOUTRSP:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_logout_response(cmd, conn);
+				break;
+			case ISTATE_SEND_ASYNCMSG:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_conn_drop_async_message(
+						cmd, conn);
+				break;
+			case ISTATE_SEND_NOPIN:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_nopin_response(cmd, conn);
+				break;
+			case ISTATE_SEND_REJECT:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_reject(cmd, conn);
+				break;
+			case ISTATE_SEND_TASKMGTRSP:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_task_mgt_rsp(cmd, conn);
+				if (ret != 0)
+					break;
+				ret = iscsit_tmr_post_handler(cmd, conn);
+				if (ret != 0)
+					iscsit_fall_back_to_erl0(conn->sess);
+				break;
+			case ISTATE_SEND_TEXTRSP:
+				spin_unlock_bh(&cmd->istate_lock);
+				use_misc = 1;
+				ret = iscsit_send_text_rsp(cmd, conn);
+				break;
+			default:
+				pr_err("Unknown Opcode: 0x%02x ITT:"
+					" 0x%08x, i_state: %d on CID: %hu\n",
+					cmd->iscsi_opcode, cmd->init_task_tag,
+					state, conn->cid);
+				spin_unlock_bh(&cmd->istate_lock);
+				goto transport_err;
+			}
+			if (ret < 0) {
+				conn->tx_response_queue = 0;
+				goto transport_err;
+			}
+
+			se_cmd = &cmd->se_cmd;
+
+			if (map_sg && !conn->conn_ops->IFMarker) {
+				if (iscsit_fe_sendpage_sg(cmd, conn) < 0) {
+					conn->tx_response_queue = 0;
+					iscsit_tx_thread_wait_for_tcp(conn);
+					iscsit_unmap_iovec(cmd);
+					goto transport_err;
+				}
+			} else {
+				if (iscsit_send_tx_data(cmd, conn, use_misc) < 0) {
+					conn->tx_response_queue = 0;
+					iscsit_tx_thread_wait_for_tcp(conn);
+					iscsit_unmap_iovec(cmd);
+					goto transport_err;
+				}
+			}
+			map_sg = 0;
+			iscsit_unmap_iovec(cmd);
+
+			spin_lock_bh(&cmd->istate_lock);
+			switch (state) {
+			case ISTATE_SEND_DATAIN:
+				if (!eodr)
+					goto check_rsp_state;
+
+				if (eodr == 1) {
+					cmd->i_state = ISTATE_SENT_LAST_DATAIN;
+					sent_status = 1;
+					eodr = use_misc = 0;
+				} else if (eodr == 2) {
+					cmd->i_state = state =
+							ISTATE_SEND_STATUS;
+					sent_status = 0;
+					eodr = use_misc = 0;
+					goto check_rsp_state;
+				}
+				break;
+			case ISTATE_SEND_STATUS:
+				use_misc = 0;
+				sent_status = 1;
+				break;
+			case ISTATE_SEND_ASYNCMSG:
+			case ISTATE_SEND_NOPIN:
+			case ISTATE_SEND_STATUS_RECOVERY:
+			case ISTATE_SEND_TEXTRSP:
+				use_misc = 0;
+				sent_status = 1;
+				break;
+			case ISTATE_SEND_REJECT:
+				use_misc = 0;
+				if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) {
+					cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN;
+					spin_unlock_bh(&cmd->istate_lock);
+					complete(&cmd->reject_comp);
+					goto transport_err;
+				}
+				complete(&cmd->reject_comp);
+				break;
+			case ISTATE_SEND_TASKMGTRSP:
+				use_misc = 0;
+				sent_status = 1;
+				break;
+			case ISTATE_SEND_LOGOUTRSP:
+				spin_unlock_bh(&cmd->istate_lock);
+				if (!iscsit_logout_post_handler(cmd, conn))
+					goto restart;
+				spin_lock_bh(&cmd->istate_lock);
+				use_misc = 0;
+				sent_status = 1;
+				break;
+			default:
+				pr_err("Unknown Opcode: 0x%02x ITT:"
+					" 0x%08x, i_state: %d on CID: %hu\n",
+					cmd->iscsi_opcode, cmd->init_task_tag,
+					cmd->i_state, conn->cid);
+				spin_unlock_bh(&cmd->istate_lock);
+				goto transport_err;
+			}
+
+			if (sent_status) {
+				cmd->i_state = ISTATE_SENT_STATUS;
+				sent_status = 0;
+			}
+			spin_unlock_bh(&cmd->istate_lock);
+
+			if (atomic_read(&conn->check_immediate_queue))
+				goto get_immediate;
+
+			goto get_response;
+		} else
+			conn->tx_response_queue = 0;
+	}
+
+transport_err:
+	iscsit_take_action_for_connection_exit(conn);
+	goto restart;
+out:
+	return 0;
+}
+
+int iscsi_target_rx_thread(void *arg)
+{
+	int ret;
+	u8 buffer[ISCSI_HDR_LEN], opcode;
+	u32 checksum = 0, digest = 0;
+	struct iscsi_conn *conn = NULL;
+	struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg;
+	struct kvec iov;
+	/*
+	 * Allow ourselves to be interrupted by SIGINT so that a
+	 * connection recovery / failure event can be triggered externally.
+	 */
+	allow_signal(SIGINT);
+
+restart:
+	conn = iscsi_rx_thread_pre_handler(ts);
+	if (!conn)
+		goto out;
+
+	while (!kthread_should_stop()) {
+		/*
+		 * Ensure that both TX and RX per connection kthreads
+		 * are scheduled to run on the same CPU.
+		 */
+		iscsit_thread_check_cpumask(conn, current, 0);
+
+		memset(buffer, 0, ISCSI_HDR_LEN);
+		memset(&iov, 0, sizeof(struct kvec));
+
+		iov.iov_base	= buffer;
+		iov.iov_len	= ISCSI_HDR_LEN;
+
+		ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN);
+		if (ret != ISCSI_HDR_LEN) {
+			iscsit_rx_thread_wait_for_tcp(conn);
+			goto transport_err;
+		}
+
+		/*
+		 * Set conn->bad_hdr for use with REJECT PDUs.
+		 */
+		memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN);
+
+		if (conn->conn_ops->HeaderDigest) {
+			iov.iov_base	= &digest;
+			iov.iov_len	= ISCSI_CRC_LEN;
+
+			ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
+			if (ret != ISCSI_CRC_LEN) {
+				iscsit_rx_thread_wait_for_tcp(conn);
+				goto transport_err;
+			}
+
+			iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+					buffer, ISCSI_HDR_LEN,
+					0, NULL, (u8 *)&checksum);
+
+			if (digest != checksum) {
+				pr_err("HeaderDigest CRC32C failed,"
+					" received 0x%08x, computed 0x%08x\n",
+					digest, checksum);
+				/*
+				 * Set the PDU to 0xff so it will intentionally
+				 * hit default in the switch below.
+				 */
+				memset(buffer, 0xff, ISCSI_HDR_LEN);
+				spin_lock_bh(&conn->sess->session_stats_lock);
+				conn->sess->conn_digest_errors++;
+				spin_unlock_bh(&conn->sess->session_stats_lock);
+			} else {
+				pr_debug("Got HeaderDigest CRC32C"
+						" 0x%08x\n", checksum);
+			}
+		}
+
+		if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT)
+			goto transport_err;
+
+		opcode = buffer[0] & ISCSI_OPCODE_MASK;
+
+		if (conn->sess->sess_ops->SessionType &&
+		   ((!(opcode & ISCSI_OP_TEXT)) ||
+		    (!(opcode & ISCSI_OP_LOGOUT)))) {
+			pr_err("Received illegal iSCSI Opcode: 0x%02x"
+			" while in Discovery Session, rejecting.\n", opcode);
+			iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+					buffer, conn);
+			goto transport_err;
+		}
+
+		switch (opcode) {
+		case ISCSI_OP_SCSI_CMD:
+			if (iscsit_handle_scsi_cmd(conn, buffer) < 0)
+				goto transport_err;
+			break;
+		case ISCSI_OP_SCSI_DATA_OUT:
+			if (iscsit_handle_data_out(conn, buffer) < 0)
+				goto transport_err;
+			break;
+		case ISCSI_OP_NOOP_OUT:
+			if (iscsit_handle_nop_out(conn, buffer) < 0)
+				goto transport_err;
+			break;
+		case ISCSI_OP_SCSI_TMFUNC:
+			if (iscsit_handle_task_mgt_cmd(conn, buffer) < 0)
+				goto transport_err;
+			break;
+		case ISCSI_OP_TEXT:
+			if (iscsit_handle_text_cmd(conn, buffer) < 0)
+				goto transport_err;
+			break;
+		case ISCSI_OP_LOGOUT:
+			ret = iscsit_handle_logout_cmd(conn, buffer);
+			if (ret > 0) {
+				wait_for_completion_timeout(&conn->conn_logout_comp,
+						SECONDS_FOR_LOGOUT_COMP * HZ);
+				goto transport_err;
+			} else if (ret < 0)
+				goto transport_err;
+			break;
+		case ISCSI_OP_SNACK:
+			if (iscsit_handle_snack(conn, buffer) < 0)
+				goto transport_err;
+			break;
+		default:
+			pr_err("Got unknown iSCSI OpCode: 0x%02x\n",
+					opcode);
+			if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+				pr_err("Cannot recover from unknown"
+				" opcode while ERL=0, closing iSCSI connection"
+				".\n");
+				goto transport_err;
+			}
+			if (!conn->conn_ops->OFMarker) {
+				pr_err("Unable to recover from unknown"
+				" opcode while OFMarker=No, closing iSCSI"
+					" connection.\n");
+				goto transport_err;
+			}
+			if (iscsit_recover_from_unknown_opcode(conn) < 0) {
+				pr_err("Unable to recover from unknown"
+					" opcode, closing iSCSI connection.\n");
+				goto transport_err;
+			}
+			break;
+		}
+	}
+
+transport_err:
+	if (!signal_pending(current))
+		atomic_set(&conn->transport_failed, 1);
+	iscsit_take_action_for_connection_exit(conn);
+	goto restart;
+out:
+	return 0;
+}
+
+static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+{
+	struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL;
+	struct iscsi_session *sess = conn->sess;
+	struct se_cmd *se_cmd;
+	/*
+	 * We expect this function to only ever be called from either RX or TX
+	 * thread context via iscsit_close_connection() once the other context
+	 * has been reset -> returned sleeping pre-handler state.
+	 */
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+		if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)) {
+
+			list_del(&cmd->i_list);
+			spin_unlock_bh(&conn->cmd_lock);
+			iscsit_increment_maxcmdsn(cmd, sess);
+			se_cmd = &cmd->se_cmd;
+			/*
+			 * Special cases for active iSCSI TMR, and
+			 * transport_lookup_cmd_lun() failing from
+			 * iscsit_get_lun_for_cmd() in iscsit_handle_scsi_cmd().
+			 */
+			if (cmd->tmr_req && se_cmd->transport_wait_for_tasks)
+				se_cmd->transport_wait_for_tasks(se_cmd, 1, 1);
+			else if (cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)
+				transport_release_cmd(se_cmd);
+			else
+				iscsit_release_cmd(cmd);
+
+			spin_lock_bh(&conn->cmd_lock);
+			continue;
+		}
+		list_del(&cmd->i_list);
+		spin_unlock_bh(&conn->cmd_lock);
+
+		iscsit_increment_maxcmdsn(cmd, sess);
+		se_cmd = &cmd->se_cmd;
+
+		if (se_cmd->transport_wait_for_tasks)
+			se_cmd->transport_wait_for_tasks(se_cmd, 1, 1);
+
+		spin_lock_bh(&conn->cmd_lock);
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+}
+
+static void iscsit_stop_timers_for_cmds(
+	struct iscsi_conn *conn)
+{
+	struct iscsi_cmd *cmd;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+		if (cmd->data_direction == DMA_TO_DEVICE)
+			iscsit_stop_dataout_timer(cmd);
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+}
+
+int iscsit_close_connection(
+	struct iscsi_conn *conn)
+{
+	int conn_logout = (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT);
+	struct iscsi_session	*sess = conn->sess;
+
+	pr_debug("Closing iSCSI connection CID %hu on SID:"
+		" %u\n", conn->cid, sess->sid);
+	/*
+	 * Always up conn_logout_comp just in case the RX Thread is sleeping
+	 * and the logout response never got sent because the connection
+	 * failed.
+	 */
+	complete(&conn->conn_logout_comp);
+
+	iscsi_release_thread_set(conn);
+
+	iscsit_stop_timers_for_cmds(conn);
+	iscsit_stop_nopin_response_timer(conn);
+	iscsit_stop_nopin_timer(conn);
+	iscsit_free_queue_reqs_for_conn(conn);
+
+	/*
+	 * During Connection recovery drop unacknowledged out of order
+	 * commands for this connection, and prepare the other commands
+	 * for realligence.
+	 *
+	 * During normal operation clear the out of order commands (but
+	 * do not free the struct iscsi_ooo_cmdsn's) and release all
+	 * struct iscsi_cmds.
+	 */
+	if (atomic_read(&conn->connection_recovery)) {
+		iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn);
+		iscsit_prepare_cmds_for_realligance(conn);
+	} else {
+		iscsit_clear_ooo_cmdsns_for_conn(conn);
+		iscsit_release_commands_from_conn(conn);
+	}
+
+	/*
+	 * Handle decrementing session or connection usage count if
+	 * a logout response was not able to be sent because the
+	 * connection failed.  Fall back to Session Recovery here.
+	 */
+	if (atomic_read(&conn->conn_logout_remove)) {
+		if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) {
+			iscsit_dec_conn_usage_count(conn);
+			iscsit_dec_session_usage_count(sess);
+		}
+		if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION)
+			iscsit_dec_conn_usage_count(conn);
+
+		atomic_set(&conn->conn_logout_remove, 0);
+		atomic_set(&sess->session_reinstatement, 0);
+		atomic_set(&sess->session_fall_back_to_erl0, 1);
+	}
+
+	spin_lock_bh(&sess->conn_lock);
+	list_del(&conn->conn_list);
+
+	/*
+	 * Attempt to let the Initiator know this connection failed by
+	 * sending an Connection Dropped Async Message on another
+	 * active connection.
+	 */
+	if (atomic_read(&conn->connection_recovery))
+		iscsit_build_conn_drop_async_message(conn);
+
+	spin_unlock_bh(&sess->conn_lock);
+
+	/*
+	 * If connection reinstatement is being performed on this connection,
+	 * up the connection reinstatement semaphore that is being blocked on
+	 * in iscsit_cause_connection_reinstatement().
+	 */
+	spin_lock_bh(&conn->state_lock);
+	if (atomic_read(&conn->sleep_on_conn_wait_comp)) {
+		spin_unlock_bh(&conn->state_lock);
+		complete(&conn->conn_wait_comp);
+		wait_for_completion(&conn->conn_post_wait_comp);
+		spin_lock_bh(&conn->state_lock);
+	}
+
+	/*
+	 * If connection reinstatement is being performed on this connection
+	 * by receiving a REMOVECONNFORRECOVERY logout request, up the
+	 * connection wait rcfr semaphore that is being blocked on
+	 * an iscsit_connection_reinstatement_rcfr().
+	 */
+	if (atomic_read(&conn->connection_wait_rcfr)) {
+		spin_unlock_bh(&conn->state_lock);
+		complete(&conn->conn_wait_rcfr_comp);
+		wait_for_completion(&conn->conn_post_wait_comp);
+		spin_lock_bh(&conn->state_lock);
+	}
+	atomic_set(&conn->connection_reinstatement, 1);
+	spin_unlock_bh(&conn->state_lock);
+
+	/*
+	 * If any other processes are accessing this connection pointer we
+	 * must wait until they have completed.
+	 */
+	iscsit_check_conn_usage_count(conn);
+
+	if (conn->conn_rx_hash.tfm)
+		crypto_free_hash(conn->conn_rx_hash.tfm);
+	if (conn->conn_tx_hash.tfm)
+		crypto_free_hash(conn->conn_tx_hash.tfm);
+
+	if (conn->conn_cpumask)
+		free_cpumask_var(conn->conn_cpumask);
+
+	kfree(conn->conn_ops);
+	conn->conn_ops = NULL;
+
+	if (conn->sock) {
+		if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+			kfree(conn->sock->file);
+			conn->sock->file = NULL;
+		}
+		sock_release(conn->sock);
+	}
+	conn->thread_set = NULL;
+
+	pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+	conn->conn_state = TARG_CONN_STATE_FREE;
+	kfree(conn);
+
+	spin_lock_bh(&sess->conn_lock);
+	atomic_dec(&sess->nconn);
+	pr_debug("Decremented iSCSI connection count to %hu from node:"
+		" %s\n", atomic_read(&sess->nconn),
+		sess->sess_ops->InitiatorName);
+	/*
+	 * Make sure that if one connection fails in an non ERL=2 iSCSI
+	 * Session that they all fail.
+	 */
+	if ((sess->sess_ops->ErrorRecoveryLevel != 2) && !conn_logout &&
+	     !atomic_read(&sess->session_logout))
+		atomic_set(&sess->session_fall_back_to_erl0, 1);
+
+	/*
+	 * If this was not the last connection in the session, and we are
+	 * performing session reinstatement or falling back to ERL=0, call
+	 * iscsit_stop_session() without sleeping to shutdown the other
+	 * active connections.
+	 */
+	if (atomic_read(&sess->nconn)) {
+		if (!atomic_read(&sess->session_reinstatement) &&
+		    !atomic_read(&sess->session_fall_back_to_erl0)) {
+			spin_unlock_bh(&sess->conn_lock);
+			return 0;
+		}
+		if (!atomic_read(&sess->session_stop_active)) {
+			atomic_set(&sess->session_stop_active, 1);
+			spin_unlock_bh(&sess->conn_lock);
+			iscsit_stop_session(sess, 0, 0);
+			return 0;
+		}
+		spin_unlock_bh(&sess->conn_lock);
+		return 0;
+	}
+
+	/*
+	 * If this was the last connection in the session and one of the
+	 * following is occurring:
+	 *
+	 * Session Reinstatement is not being performed, and are falling back
+	 * to ERL=0 call iscsit_close_session().
+	 *
+	 * Session Logout was requested.  iscsit_close_session() will be called
+	 * elsewhere.
+	 *
+	 * Session Continuation is not being performed, start the Time2Retain
+	 * handler and check if sleep_on_sess_wait_sem is active.
+	 */
+	if (!atomic_read(&sess->session_reinstatement) &&
+	     atomic_read(&sess->session_fall_back_to_erl0)) {
+		spin_unlock_bh(&sess->conn_lock);
+		iscsit_close_session(sess);
+
+		return 0;
+	} else if (atomic_read(&sess->session_logout)) {
+		pr_debug("Moving to TARG_SESS_STATE_FREE.\n");
+		sess->session_state = TARG_SESS_STATE_FREE;
+		spin_unlock_bh(&sess->conn_lock);
+
+		if (atomic_read(&sess->sleep_on_sess_wait_comp))
+			complete(&sess->session_wait_comp);
+
+		return 0;
+	} else {
+		pr_debug("Moving to TARG_SESS_STATE_FAILED.\n");
+		sess->session_state = TARG_SESS_STATE_FAILED;
+
+		if (!atomic_read(&sess->session_continuation)) {
+			spin_unlock_bh(&sess->conn_lock);
+			iscsit_start_time2retain_handler(sess);
+		} else
+			spin_unlock_bh(&sess->conn_lock);
+
+		if (atomic_read(&sess->sleep_on_sess_wait_comp))
+			complete(&sess->session_wait_comp);
+
+		return 0;
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	return 0;
+}
+
+int iscsit_close_session(struct iscsi_session *sess)
+{
+	struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+	if (atomic_read(&sess->nconn)) {
+		pr_err("%d connection(s) still exist for iSCSI session"
+			" to %s\n", atomic_read(&sess->nconn),
+			sess->sess_ops->InitiatorName);
+		BUG();
+	}
+
+	spin_lock_bh(&se_tpg->session_lock);
+	atomic_set(&sess->session_logout, 1);
+	atomic_set(&sess->session_reinstatement, 1);
+	iscsit_stop_time2retain_timer(sess);
+	spin_unlock_bh(&se_tpg->session_lock);
+
+	/*
+	 * transport_deregister_session_configfs() will clear the
+	 * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context
+	 * can be setting it again with __transport_register_session() in
+	 * iscsi_post_login_handler() again after the iscsit_stop_session()
+	 * completes in iscsi_np context.
+	 */
+	transport_deregister_session_configfs(sess->se_sess);
+
+	/*
+	 * If any other processes are accessing this session pointer we must
+	 * wait until they have completed.  If we are in an interrupt (the
+	 * time2retain handler) and contain and active session usage count we
+	 * restart the timer and exit.
+	 */
+	if (!in_interrupt()) {
+		if (iscsit_check_session_usage_count(sess) == 1)
+			iscsit_stop_session(sess, 1, 1);
+	} else {
+		if (iscsit_check_session_usage_count(sess) == 2) {
+			atomic_set(&sess->session_logout, 0);
+			iscsit_start_time2retain_handler(sess);
+			return 0;
+		}
+	}
+
+	transport_deregister_session(sess->se_sess);
+
+	if (sess->sess_ops->ErrorRecoveryLevel == 2)
+		iscsit_free_connection_recovery_entires(sess);
+
+	iscsit_free_all_ooo_cmdsns(sess);
+
+	spin_lock_bh(&se_tpg->session_lock);
+	pr_debug("Moving to TARG_SESS_STATE_FREE.\n");
+	sess->session_state = TARG_SESS_STATE_FREE;
+	pr_debug("Released iSCSI session from node: %s\n",
+			sess->sess_ops->InitiatorName);
+	tpg->nsessions--;
+	if (tpg->tpg_tiqn)
+		tpg->tpg_tiqn->tiqn_nsessions--;
+
+	pr_debug("Decremented number of active iSCSI Sessions on"
+		" iSCSI TPG: %hu to %u\n", tpg->tpgt, tpg->nsessions);
+
+	spin_lock(&sess_idr_lock);
+	idr_remove(&sess_idr, sess->session_index);
+	spin_unlock(&sess_idr_lock);
+
+	kfree(sess->sess_ops);
+	sess->sess_ops = NULL;
+	spin_unlock_bh(&se_tpg->session_lock);
+
+	kfree(sess);
+	return 0;
+}
+
+static void iscsit_logout_post_handler_closesession(
+	struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+
+	iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
+	iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+
+	atomic_set(&conn->conn_logout_remove, 0);
+	complete(&conn->conn_logout_comp);
+
+	iscsit_dec_conn_usage_count(conn);
+	iscsit_stop_session(sess, 1, 1);
+	iscsit_dec_session_usage_count(sess);
+	iscsit_close_session(sess);
+}
+
+static void iscsit_logout_post_handler_samecid(
+	struct iscsi_conn *conn)
+{
+	iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
+	iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+
+	atomic_set(&conn->conn_logout_remove, 0);
+	complete(&conn->conn_logout_comp);
+
+	iscsit_cause_connection_reinstatement(conn, 1);
+	iscsit_dec_conn_usage_count(conn);
+}
+
+static void iscsit_logout_post_handler_diffcid(
+	struct iscsi_conn *conn,
+	u16 cid)
+{
+	struct iscsi_conn *l_conn;
+	struct iscsi_session *sess = conn->sess;
+
+	if (!sess)
+		return;
+
+	spin_lock_bh(&sess->conn_lock);
+	list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) {
+		if (l_conn->cid == cid) {
+			iscsit_inc_conn_usage_count(l_conn);
+			break;
+		}
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	if (!l_conn)
+		return;
+
+	if (l_conn->sock)
+		l_conn->sock->ops->shutdown(l_conn->sock, RCV_SHUTDOWN);
+
+	spin_lock_bh(&l_conn->state_lock);
+	pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+	l_conn->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+	spin_unlock_bh(&l_conn->state_lock);
+
+	iscsit_cause_connection_reinstatement(l_conn, 1);
+	iscsit_dec_conn_usage_count(l_conn);
+}
+
+/*
+ *	Return of 0 causes the TX thread to restart.
+ */
+static int iscsit_logout_post_handler(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	int ret = 0;
+
+	switch (cmd->logout_reason) {
+	case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+		switch (cmd->logout_response) {
+		case ISCSI_LOGOUT_SUCCESS:
+		case ISCSI_LOGOUT_CLEANUP_FAILED:
+		default:
+			iscsit_logout_post_handler_closesession(conn);
+			break;
+		}
+		ret = 0;
+		break;
+	case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+		if (conn->cid == cmd->logout_cid) {
+			switch (cmd->logout_response) {
+			case ISCSI_LOGOUT_SUCCESS:
+			case ISCSI_LOGOUT_CLEANUP_FAILED:
+			default:
+				iscsit_logout_post_handler_samecid(conn);
+				break;
+			}
+			ret = 0;
+		} else {
+			switch (cmd->logout_response) {
+			case ISCSI_LOGOUT_SUCCESS:
+				iscsit_logout_post_handler_diffcid(conn,
+					cmd->logout_cid);
+				break;
+			case ISCSI_LOGOUT_CID_NOT_FOUND:
+			case ISCSI_LOGOUT_CLEANUP_FAILED:
+			default:
+				break;
+			}
+			ret = 1;
+		}
+		break;
+	case ISCSI_LOGOUT_REASON_RECOVERY:
+		switch (cmd->logout_response) {
+		case ISCSI_LOGOUT_SUCCESS:
+		case ISCSI_LOGOUT_CID_NOT_FOUND:
+		case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED:
+		case ISCSI_LOGOUT_CLEANUP_FAILED:
+		default:
+			break;
+		}
+		ret = 1;
+		break;
+	default:
+		break;
+
+	}
+	return ret;
+}
+
+void iscsit_fail_session(struct iscsi_session *sess)
+{
+	struct iscsi_conn *conn;
+
+	spin_lock_bh(&sess->conn_lock);
+	list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+		pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n");
+		conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT;
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	pr_debug("Moving to TARG_SESS_STATE_FAILED.\n");
+	sess->session_state = TARG_SESS_STATE_FAILED;
+}
+
+int iscsit_free_session(struct iscsi_session *sess)
+{
+	u16 conn_count = atomic_read(&sess->nconn);
+	struct iscsi_conn *conn, *conn_tmp = NULL;
+	int is_last;
+
+	spin_lock_bh(&sess->conn_lock);
+	atomic_set(&sess->sleep_on_sess_wait_comp, 1);
+
+	list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list,
+			conn_list) {
+		if (conn_count == 0)
+			break;
+
+		if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) {
+			is_last = 1;
+		} else {
+			iscsit_inc_conn_usage_count(conn_tmp);
+			is_last = 0;
+		}
+		iscsit_inc_conn_usage_count(conn);
+
+		spin_unlock_bh(&sess->conn_lock);
+		iscsit_cause_connection_reinstatement(conn, 1);
+		spin_lock_bh(&sess->conn_lock);
+
+		iscsit_dec_conn_usage_count(conn);
+		if (is_last == 0)
+			iscsit_dec_conn_usage_count(conn_tmp);
+
+		conn_count--;
+	}
+
+	if (atomic_read(&sess->nconn)) {
+		spin_unlock_bh(&sess->conn_lock);
+		wait_for_completion(&sess->session_wait_comp);
+	} else
+		spin_unlock_bh(&sess->conn_lock);
+
+	iscsit_close_session(sess);
+	return 0;
+}
+
+void iscsit_stop_session(
+	struct iscsi_session *sess,
+	int session_sleep,
+	int connection_sleep)
+{
+	u16 conn_count = atomic_read(&sess->nconn);
+	struct iscsi_conn *conn, *conn_tmp = NULL;
+	int is_last;
+
+	spin_lock_bh(&sess->conn_lock);
+	if (session_sleep)
+		atomic_set(&sess->sleep_on_sess_wait_comp, 1);
+
+	if (connection_sleep) {
+		list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list,
+				conn_list) {
+			if (conn_count == 0)
+				break;
+
+			if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) {
+				is_last = 1;
+			} else {
+				iscsit_inc_conn_usage_count(conn_tmp);
+				is_last = 0;
+			}
+			iscsit_inc_conn_usage_count(conn);
+
+			spin_unlock_bh(&sess->conn_lock);
+			iscsit_cause_connection_reinstatement(conn, 1);
+			spin_lock_bh(&sess->conn_lock);
+
+			iscsit_dec_conn_usage_count(conn);
+			if (is_last == 0)
+				iscsit_dec_conn_usage_count(conn_tmp);
+			conn_count--;
+		}
+	} else {
+		list_for_each_entry(conn, &sess->sess_conn_list, conn_list)
+			iscsit_cause_connection_reinstatement(conn, 0);
+	}
+
+	if (session_sleep && atomic_read(&sess->nconn)) {
+		spin_unlock_bh(&sess->conn_lock);
+		wait_for_completion(&sess->session_wait_comp);
+	} else
+		spin_unlock_bh(&sess->conn_lock);
+}
+
+int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force)
+{
+	struct iscsi_session *sess;
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+	struct se_session *se_sess, *se_sess_tmp;
+	int session_count = 0;
+
+	spin_lock_bh(&se_tpg->session_lock);
+	if (tpg->nsessions && !force) {
+		spin_unlock_bh(&se_tpg->session_lock);
+		return -1;
+	}
+
+	list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+			sess_list) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+
+		spin_lock(&sess->conn_lock);
+		if (atomic_read(&sess->session_fall_back_to_erl0) ||
+		    atomic_read(&sess->session_logout) ||
+		    (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+			spin_unlock(&sess->conn_lock);
+			continue;
+		}
+		atomic_set(&sess->session_reinstatement, 1);
+		spin_unlock(&sess->conn_lock);
+		spin_unlock_bh(&se_tpg->session_lock);
+
+		iscsit_free_session(sess);
+		spin_lock_bh(&se_tpg->session_lock);
+
+		session_count++;
+	}
+	spin_unlock_bh(&se_tpg->session_lock);
+
+	pr_debug("Released %d iSCSI Session(s) from Target Portal"
+			" Group: %hu\n", session_count, tpg->tpgt);
+	return 0;
+}
+
+MODULE_DESCRIPTION("iSCSI-Target Driver for mainline target infrastructure");
+MODULE_VERSION("4.1.x");
+MODULE_AUTHOR("nab@Linux-iSCSI.org");
+MODULE_LICENSE("GPL");
+
+module_init(iscsi_target_init_module);
+module_exit(iscsi_target_cleanup_module);
diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h
new file mode 100644
index 0000000..5db2dde
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target.h
@@ -0,0 +1,42 @@
+#ifndef ISCSI_TARGET_H
+#define ISCSI_TARGET_H
+
+extern struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *);
+extern struct iscsi_tiqn *iscsit_get_tiqn(unsigned char *, int);
+extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *);
+extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *);
+extern void iscsit_del_tiqn(struct iscsi_tiqn *);
+extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *,
+				char *, int);
+extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *,
+				struct iscsi_portal_group *);
+extern int iscsit_del_np(struct iscsi_np *);
+extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *);
+extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_send_async_msg(struct iscsi_conn *, u16, u8, u8);
+extern int iscsit_send_r2t(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_build_r2ts_for_cmd(struct iscsi_cmd *, struct iscsi_conn *, int);
+extern void iscsit_thread_get_cpumask(struct iscsi_conn *);
+extern int iscsi_target_tx_thread(void *);
+extern int iscsi_target_rx_thread(void *);
+extern int iscsit_close_connection(struct iscsi_conn *);
+extern int iscsit_close_session(struct iscsi_session *);
+extern void iscsit_fail_session(struct iscsi_session *);
+extern int iscsit_free_session(struct iscsi_session *);
+extern void iscsit_stop_session(struct iscsi_session *, int, int);
+extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int);
+
+extern struct iscsit_global *iscsit_global;
+extern struct target_fabric_configfs *lio_target_fabric_configfs;
+
+extern struct kmem_cache *lio_dr_cache;
+extern struct kmem_cache *lio_ooo_cache;
+extern struct kmem_cache *lio_cmd_cache;
+extern struct kmem_cache *lio_qr_cache;
+extern struct kmem_cache *lio_r2t_cache;
+
+#endif   /*** ISCSI_TARGET_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
new file mode 100644
index 0000000..11fd743
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -0,0 +1,490 @@
+/*******************************************************************************
+ * This file houses the main functions for the iSCSI CHAP support
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_auth.h"
+
+static unsigned char chap_asciihex_to_binaryhex(unsigned char val[2])
+{
+	unsigned char result = 0;
+	/*
+	 * MSB
+	 */
+	if ((val[0] >= 'a') && (val[0] <= 'f'))
+		result = ((val[0] - 'a' + 10) & 0xf) << 4;
+	else
+		if ((val[0] >= 'A') && (val[0] <= 'F'))
+			result = ((val[0] - 'A' + 10) & 0xf) << 4;
+		else /* digit */
+			result = ((val[0] - '0') & 0xf) << 4;
+	/*
+	 * LSB
+	 */
+	if ((val[1] >= 'a') && (val[1] <= 'f'))
+		result |= ((val[1] - 'a' + 10) & 0xf);
+	else
+		if ((val[1] >= 'A') && (val[1] <= 'F'))
+			result |= ((val[1] - 'A' + 10) & 0xf);
+		else /* digit */
+			result |= ((val[1] - '0') & 0xf);
+
+	return result;
+}
+
+static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len)
+{
+	int i, j = 0;
+
+	for (i = 0; i < len; i += 2) {
+		dst[j++] = (unsigned char) chap_asciihex_to_binaryhex(&src[i]);
+	}
+
+	dst[j] = '\0';
+	return j;
+}
+
+static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
+{
+	int i;
+
+	for (i = 0; i < src_len; i++) {
+		sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff);
+	}
+}
+
+static void chap_set_random(char *data, int length)
+{
+	long r;
+	unsigned n;
+
+	while (length > 0) {
+		get_random_bytes(&r, sizeof(long));
+		r = r ^ (r >> 8);
+		r = r ^ (r >> 4);
+		n = r & 0x7;
+
+		get_random_bytes(&r, sizeof(long));
+		r = r ^ (r >> 8);
+		r = r ^ (r >> 5);
+		n = (n << 3) | (r & 0x7);
+
+		get_random_bytes(&r, sizeof(long));
+		r = r ^ (r >> 8);
+		r = r ^ (r >> 5);
+		n = (n << 2) | (r & 0x3);
+
+		*data++ = n;
+		length--;
+	}
+}
+
+static void chap_gen_challenge(
+	struct iscsi_conn *conn,
+	int caller,
+	char *c_str,
+	unsigned int *c_len)
+{
+	unsigned char challenge_asciihex[CHAP_CHALLENGE_LENGTH * 2 + 1];
+	struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+	memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1);
+
+	chap_set_random(chap->challenge, CHAP_CHALLENGE_LENGTH);
+	chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge,
+				CHAP_CHALLENGE_LENGTH);
+	/*
+	 * Set CHAP_C, and copy the generated challenge into c_str.
+	 */
+	*c_len += sprintf(c_str + *c_len, "CHAP_C=0x%s", challenge_asciihex);
+	*c_len += 1;
+
+	pr_debug("[%s] Sending CHAP_C=0x%s\n\n", (caller) ? "server" : "client",
+			challenge_asciihex);
+}
+
+
+static struct iscsi_chap *chap_server_open(
+	struct iscsi_conn *conn,
+	struct iscsi_node_auth *auth,
+	const char *a_str,
+	char *aic_str,
+	unsigned int *aic_len)
+{
+	struct iscsi_chap *chap;
+
+	if (!(auth->naf_flags & NAF_USERID_SET) ||
+	    !(auth->naf_flags & NAF_PASSWORD_SET)) {
+		pr_err("CHAP user or password not set for"
+				" Initiator ACL\n");
+		return NULL;
+	}
+
+	conn->auth_protocol = kzalloc(sizeof(struct iscsi_chap), GFP_KERNEL);
+	if (!conn->auth_protocol)
+		return NULL;
+
+	chap = (struct iscsi_chap *) conn->auth_protocol;
+	/*
+	 * We only support MD5 MDA presently.
+	 */
+	if (strncmp(a_str, "CHAP_A=5", 8)) {
+		pr_err("CHAP_A is not MD5.\n");
+		return NULL;
+	}
+	pr_debug("[server] Got CHAP_A=5\n");
+	/*
+	 * Send back CHAP_A set to MD5.
+	 */
+	*aic_len = sprintf(aic_str, "CHAP_A=5");
+	*aic_len += 1;
+	chap->digest_type = CHAP_DIGEST_MD5;
+	pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+	/*
+	 * Set Identifier.
+	 */
+	chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++;
+	*aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id);
+	*aic_len += 1;
+	pr_debug("[server] Sending CHAP_I=%d\n", chap->id);
+	/*
+	 * Generate Challenge.
+	 */
+	chap_gen_challenge(conn, 1, aic_str, aic_len);
+
+	return chap;
+}
+
+static void chap_close(struct iscsi_conn *conn)
+{
+	kfree(conn->auth_protocol);
+	conn->auth_protocol = NULL;
+}
+
+static int chap_server_compute_md5(
+	struct iscsi_conn *conn,
+	struct iscsi_node_auth *auth,
+	char *nr_in_ptr,
+	char *nr_out_ptr,
+	unsigned int *nr_out_len)
+{
+	char *endptr;
+	unsigned char id, digest[MD5_SIGNATURE_SIZE];
+	unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2];
+	unsigned char identifier[10], *challenge = NULL;
+	unsigned char *challenge_binhex = NULL;
+	unsigned char client_digest[MD5_SIGNATURE_SIZE];
+	unsigned char server_digest[MD5_SIGNATURE_SIZE];
+	unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH];
+	struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
+	struct scatterlist sg;
+	int auth_ret = -1, ret, challenge_len;
+
+	memset(identifier, 0, 10);
+	memset(chap_n, 0, MAX_CHAP_N_SIZE);
+	memset(chap_r, 0, MAX_RESPONSE_LENGTH);
+	memset(digest, 0, MD5_SIGNATURE_SIZE);
+	memset(response, 0, MD5_SIGNATURE_SIZE * 2 + 2);
+	memset(client_digest, 0, MD5_SIGNATURE_SIZE);
+	memset(server_digest, 0, MD5_SIGNATURE_SIZE);
+
+	challenge = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL);
+	if (!challenge) {
+		pr_err("Unable to allocate challenge buffer\n");
+		goto out;
+	}
+
+	challenge_binhex = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL);
+	if (!challenge_binhex) {
+		pr_err("Unable to allocate challenge_binhex buffer\n");
+		goto out;
+	}
+	/*
+	 * Extract CHAP_N.
+	 */
+	if (extract_param(nr_in_ptr, "CHAP_N", MAX_CHAP_N_SIZE, chap_n,
+				&type) < 0) {
+		pr_err("Could not find CHAP_N.\n");
+		goto out;
+	}
+	if (type == HEX) {
+		pr_err("Could not find CHAP_N.\n");
+		goto out;
+	}
+
+	if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) {
+		pr_err("CHAP_N values do not match!\n");
+		goto out;
+	}
+	pr_debug("[server] Got CHAP_N=%s\n", chap_n);
+	/*
+	 * Extract CHAP_R.
+	 */
+	if (extract_param(nr_in_ptr, "CHAP_R", MAX_RESPONSE_LENGTH, chap_r,
+				&type) < 0) {
+		pr_err("Could not find CHAP_R.\n");
+		goto out;
+	}
+	if (type != HEX) {
+		pr_err("Could not find CHAP_R.\n");
+		goto out;
+	}
+
+	pr_debug("[server] Got CHAP_R=%s\n", chap_r);
+	chap_string_to_hex(client_digest, chap_r, strlen(chap_r));
+
+	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		pr_err("Unable to allocate struct crypto_hash\n");
+		goto out;
+	}
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	ret = crypto_hash_init(&desc);
+	if (ret < 0) {
+		pr_err("crypto_hash_init() failed\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	sg_init_one(&sg, (void *)&chap->id, 1);
+	ret = crypto_hash_update(&desc, &sg, 1);
+	if (ret < 0) {
+		pr_err("crypto_hash_update() failed for id\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	sg_init_one(&sg, (void *)&auth->password, strlen(auth->password));
+	ret = crypto_hash_update(&desc, &sg, strlen(auth->password));
+	if (ret < 0) {
+		pr_err("crypto_hash_update() failed for password\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	sg_init_one(&sg, (void *)chap->challenge, CHAP_CHALLENGE_LENGTH);
+	ret = crypto_hash_update(&desc, &sg, CHAP_CHALLENGE_LENGTH);
+	if (ret < 0) {
+		pr_err("crypto_hash_update() failed for challenge\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	ret = crypto_hash_final(&desc, server_digest);
+	if (ret < 0) {
+		pr_err("crypto_hash_final() failed for server digest\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+	crypto_free_hash(tfm);
+
+	chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE);
+	pr_debug("[server] MD5 Server Digest: %s\n", response);
+
+	if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) {
+		pr_debug("[server] MD5 Digests do not match!\n\n");
+		goto out;
+	} else
+		pr_debug("[server] MD5 Digests match, CHAP connetication"
+				" successful.\n\n");
+	/*
+	 * One way authentication has succeeded, return now if mutual
+	 * authentication is not enabled.
+	 */
+	if (!auth->authenticate_target) {
+		kfree(challenge);
+		kfree(challenge_binhex);
+		return 0;
+	}
+	/*
+	 * Get CHAP_I.
+	 */
+	if (extract_param(nr_in_ptr, "CHAP_I", 10, identifier, &type) < 0) {
+		pr_err("Could not find CHAP_I.\n");
+		goto out;
+	}
+
+	if (type == HEX)
+		id = (unsigned char)simple_strtoul((char *)&identifier[2],
+					&endptr, 0);
+	else
+		id = (unsigned char)simple_strtoul(identifier, &endptr, 0);
+	/*
+	 * RFC 1994 says Identifier is no more than octet (8 bits).
+	 */
+	pr_debug("[server] Got CHAP_I=%d\n", id);
+	/*
+	 * Get CHAP_C.
+	 */
+	if (extract_param(nr_in_ptr, "CHAP_C", CHAP_CHALLENGE_STR_LEN,
+			challenge, &type) < 0) {
+		pr_err("Could not find CHAP_C.\n");
+		goto out;
+	}
+
+	if (type != HEX) {
+		pr_err("Could not find CHAP_C.\n");
+		goto out;
+	}
+	pr_debug("[server] Got CHAP_C=%s\n", challenge);
+	challenge_len = chap_string_to_hex(challenge_binhex, challenge,
+				strlen(challenge));
+	if (!challenge_len) {
+		pr_err("Unable to convert incoming challenge\n");
+		goto out;
+	}
+	/*
+	 * Generate CHAP_N and CHAP_R for mutual authentication.
+	 */
+	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		pr_err("Unable to allocate struct crypto_hash\n");
+		goto out;
+	}
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	ret = crypto_hash_init(&desc);
+	if (ret < 0) {
+		pr_err("crypto_hash_init() failed\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	sg_init_one(&sg, (void *)&id, 1);
+	ret = crypto_hash_update(&desc, &sg, 1);
+	if (ret < 0) {
+		pr_err("crypto_hash_update() failed for id\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	sg_init_one(&sg, (void *)auth->password_mutual,
+				strlen(auth->password_mutual));
+	ret = crypto_hash_update(&desc, &sg, strlen(auth->password_mutual));
+	if (ret < 0) {
+		pr_err("crypto_hash_update() failed for"
+				" password_mutual\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+	/*
+	 * Convert received challenge to binary hex.
+	 */
+	sg_init_one(&sg, (void *)challenge_binhex, challenge_len);
+	ret = crypto_hash_update(&desc, &sg, challenge_len);
+	if (ret < 0) {
+		pr_err("crypto_hash_update() failed for ma challenge\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+
+	ret = crypto_hash_final(&desc, digest);
+	if (ret < 0) {
+		pr_err("crypto_hash_final() failed for ma digest\n");
+		crypto_free_hash(tfm);
+		goto out;
+	}
+	crypto_free_hash(tfm);
+	/*
+	 * Generate CHAP_N and CHAP_R.
+	 */
+	*nr_out_len = sprintf(nr_out_ptr, "CHAP_N=%s", auth->userid_mutual);
+	*nr_out_len += 1;
+	pr_debug("[server] Sending CHAP_N=%s\n", auth->userid_mutual);
+	/*
+	 * Convert response from binary hex to ascii hext.
+	 */
+	chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE);
+	*nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s",
+			response);
+	*nr_out_len += 1;
+	pr_debug("[server] Sending CHAP_R=0x%s\n", response);
+	auth_ret = 0;
+out:
+	kfree(challenge);
+	kfree(challenge_binhex);
+	return auth_ret;
+}
+
+static int chap_got_response(
+	struct iscsi_conn *conn,
+	struct iscsi_node_auth *auth,
+	char *nr_in_ptr,
+	char *nr_out_ptr,
+	unsigned int *nr_out_len)
+{
+	struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+	switch (chap->digest_type) {
+	case CHAP_DIGEST_MD5:
+		if (chap_server_compute_md5(conn, auth, nr_in_ptr,
+				nr_out_ptr, nr_out_len) < 0)
+			return -1;
+		return 0;
+	default:
+		pr_err("Unknown CHAP digest type %d!\n",
+				chap->digest_type);
+		return -1;
+	}
+}
+
+u32 chap_main_loop(
+	struct iscsi_conn *conn,
+	struct iscsi_node_auth *auth,
+	char *in_text,
+	char *out_text,
+	int *in_len,
+	int *out_len)
+{
+	struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+	if (!chap) {
+		chap = chap_server_open(conn, auth, in_text, out_text, out_len);
+		if (!chap)
+			return 2;
+		chap->chap_state = CHAP_STAGE_SERVER_AIC;
+		return 0;
+	} else if (chap->chap_state == CHAP_STAGE_SERVER_AIC) {
+		convert_null_to_semi(in_text, *in_len);
+		if (chap_got_response(conn, auth, in_text, out_text,
+				out_len) < 0) {
+			chap_close(conn);
+			return 2;
+		}
+		if (auth->authenticate_target)
+			chap->chap_state = CHAP_STAGE_SERVER_NR;
+		else
+			*out_len = 0;
+		chap_close(conn);
+		return 1;
+	}
+
+	return 2;
+}
diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h
new file mode 100644
index 0000000..2f463c0
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_auth.h
@@ -0,0 +1,31 @@
+#ifndef _ISCSI_CHAP_H_
+#define _ISCSI_CHAP_H_
+
+#define CHAP_DIGEST_MD5		5
+#define CHAP_DIGEST_SHA		6
+
+#define CHAP_CHALLENGE_LENGTH	16
+#define CHAP_CHALLENGE_STR_LEN	4096
+#define MAX_RESPONSE_LENGTH	64	/* sufficient for MD5 */
+#define	MAX_CHAP_N_SIZE		512
+
+#define MD5_SIGNATURE_SIZE	16	/* 16 bytes in a MD5 message digest */
+
+#define CHAP_STAGE_CLIENT_A	1
+#define CHAP_STAGE_SERVER_AIC	2
+#define CHAP_STAGE_CLIENT_NR	3
+#define CHAP_STAGE_CLIENT_NRIC	4
+#define CHAP_STAGE_SERVER_NR	5
+
+extern u32 chap_main_loop(struct iscsi_conn *, struct iscsi_node_auth *, char *, char *,
+				int *, int *);
+
+struct iscsi_chap {
+	unsigned char	digest_type;
+	unsigned char	id;
+	unsigned char	challenge[CHAP_CHALLENGE_LENGTH];
+	unsigned int	authenticate_target;
+	unsigned int	chap_state;
+} ____cacheline_aligned;
+
+#endif   /*** _ISCSI_CHAP_H_ ***/
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
new file mode 100644
index 0000000..32bb92c
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -0,0 +1,1882 @@
+/*******************************************************************************
+ * This file contains the configfs implementation for iSCSI Target mode
+ * from the LIO-Target Project.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ****************************************************************************/
+
+#include <linux/configfs.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_fabric_configfs.h>
+#include <target/target_core_fabric_lib.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/configfs_macros.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_nodeattrib.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_stat.h"
+#include "iscsi_target_configfs.h"
+
+struct target_fabric_configfs *lio_target_fabric_configfs;
+
+struct lio_target_configfs_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(void *, char *);
+	ssize_t (*store)(void *, const char *, size_t);
+};
+
+struct iscsi_portal_group *lio_get_tpg_from_tpg_item(
+	struct config_item *item,
+	struct iscsi_tiqn **tiqn_out)
+{
+	struct se_portal_group *se_tpg = container_of(to_config_group(item),
+					struct se_portal_group, tpg_group);
+	struct iscsi_portal_group *tpg =
+			(struct iscsi_portal_group *)se_tpg->se_tpg_fabric_ptr;
+	int ret;
+
+	if (!tpg) {
+		pr_err("Unable to locate struct iscsi_portal_group "
+			"pointer\n");
+		return NULL;
+	}
+	ret = iscsit_get_tpg(tpg);
+	if (ret < 0)
+		return NULL;
+
+	*tiqn_out = tpg->tpg_tiqn;
+	return tpg;
+}
+
+/* Start items for lio_target_portal_cit */
+
+static ssize_t lio_target_np_show_sctp(
+	struct se_tpg_np *se_tpg_np,
+	char *page)
+{
+	struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np,
+				struct iscsi_tpg_np, se_tpg_np);
+	struct iscsi_tpg_np *tpg_np_sctp;
+	ssize_t rb;
+
+	tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP);
+	if (tpg_np_sctp)
+		rb = sprintf(page, "1\n");
+	else
+		rb = sprintf(page, "0\n");
+
+	return rb;
+}
+
+static ssize_t lio_target_np_store_sctp(
+	struct se_tpg_np *se_tpg_np,
+	const char *page,
+	size_t count)
+{
+	struct iscsi_np *np;
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np,
+				struct iscsi_tpg_np, se_tpg_np);
+	struct iscsi_tpg_np *tpg_np_sctp = NULL;
+	char *endptr;
+	u32 op;
+	int ret;
+
+	op = simple_strtoul(page, &endptr, 0);
+	if ((op != 1) && (op != 0)) {
+		pr_err("Illegal value for tpg_enable: %u\n", op);
+		return -EINVAL;
+	}
+	np = tpg_np->tpg_np;
+	if (!np) {
+		pr_err("Unable to locate struct iscsi_np from"
+				" struct iscsi_tpg_np\n");
+		return -EINVAL;
+	}
+
+	tpg = tpg_np->tpg;
+	if (iscsit_get_tpg(tpg) < 0)
+		return -EINVAL;
+
+	if (op) {
+		/*
+		 * Use existing np->np_sockaddr for SCTP network portal reference
+		 */
+		tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr,
+					np->np_ip, tpg_np, ISCSI_SCTP_TCP);
+		if (!tpg_np_sctp || IS_ERR(tpg_np_sctp))
+			goto out;
+	} else {
+		tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP);
+		if (!tpg_np_sctp)
+			goto out;
+
+		ret = iscsit_tpg_del_network_portal(tpg, tpg_np_sctp);
+		if (ret < 0)
+			goto out;
+	}
+
+	iscsit_put_tpg(tpg);
+	return count;
+out:
+	iscsit_put_tpg(tpg);
+	return -EINVAL;
+}
+
+TF_NP_BASE_ATTR(lio_target, sctp, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_portal_attrs[] = {
+	&lio_target_np_sctp.attr,
+	NULL,
+};
+
+/* Stop items for lio_target_portal_cit */
+
+/* Start items for lio_target_np_cit */
+
+#define MAX_PORTAL_LEN		256
+
+struct se_tpg_np *lio_target_call_addnptotpg(
+	struct se_portal_group *se_tpg,
+	struct config_group *group,
+	const char *name)
+{
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tpg_np *tpg_np;
+	char *str, *str2, *ip_str, *port_str;
+	struct __kernel_sockaddr_storage sockaddr;
+	struct sockaddr_in *sock_in;
+	struct sockaddr_in6 *sock_in6;
+	unsigned long port;
+	int ret;
+	char buf[MAX_PORTAL_LEN + 1];
+
+	if (strlen(name) > MAX_PORTAL_LEN) {
+		pr_err("strlen(name): %d exceeds MAX_PORTAL_LEN: %d\n",
+			(int)strlen(name), MAX_PORTAL_LEN);
+		return ERR_PTR(-EOVERFLOW);
+	}
+	memset(buf, 0, MAX_PORTAL_LEN + 1);
+	snprintf(buf, MAX_PORTAL_LEN, "%s", name);
+
+	memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
+
+	str = strstr(buf, "[");
+	if (str) {
+		const char *end;
+
+		str2 = strstr(str, "]");
+		if (!str2) {
+			pr_err("Unable to locate trailing \"]\""
+				" in IPv6 iSCSI network portal address\n");
+			return ERR_PTR(-EINVAL);
+		}
+		str++; /* Skip over leading "[" */
+		*str2 = '\0'; /* Terminate the IPv6 address */
+		str2++; /* Skip over the "]" */
+		port_str = strstr(str2, ":");
+		if (!port_str) {
+			pr_err("Unable to locate \":port\""
+				" in IPv6 iSCSI network portal address\n");
+			return ERR_PTR(-EINVAL);
+		}
+		*port_str = '\0'; /* Terminate string for IP */
+		port_str++; /* Skip over ":" */
+
+		ret = strict_strtoul(port_str, 0, &port);
+		if (ret < 0) {
+			pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+			return ERR_PTR(ret);
+		}
+		sock_in6 = (struct sockaddr_in6 *)&sockaddr;
+		sock_in6->sin6_family = AF_INET6;
+		sock_in6->sin6_port = htons((unsigned short)port);
+		ret = in6_pton(str, IPV6_ADDRESS_SPACE,
+				(void *)&sock_in6->sin6_addr.in6_u, -1, &end);
+		if (ret <= 0) {
+			pr_err("in6_pton returned: %d\n", ret);
+			return ERR_PTR(-EINVAL);
+		}
+	} else {
+		str = ip_str = &buf[0];
+		port_str = strstr(ip_str, ":");
+		if (!port_str) {
+			pr_err("Unable to locate \":port\""
+				" in IPv4 iSCSI network portal address\n");
+			return ERR_PTR(-EINVAL);
+		}
+		*port_str = '\0'; /* Terminate string for IP */
+		port_str++; /* Skip over ":" */
+
+		ret = strict_strtoul(port_str, 0, &port);
+		if (ret < 0) {
+			pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+			return ERR_PTR(ret);
+		}
+		sock_in = (struct sockaddr_in *)&sockaddr;
+		sock_in->sin_family = AF_INET;
+		sock_in->sin_port = htons((unsigned short)port);
+		sock_in->sin_addr.s_addr = in_aton(ip_str);
+	}
+	tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
+	ret = iscsit_get_tpg(tpg);
+	if (ret < 0)
+		return ERR_PTR(-EINVAL);
+
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> %s TPGT: %hu"
+		" PORTAL: %s\n",
+		config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+		tpg->tpgt, name);
+	/*
+	 * Assume ISCSI_TCP by default.  Other network portals for other
+	 * iSCSI fabrics:
+	 *
+	 * Traditional iSCSI over SCTP (initial support)
+	 * iSER/TCP (TODO, hardware available)
+	 * iSER/SCTP (TODO, software emulation with osc-iwarp)
+	 * iSER/IB (TODO, hardware available)
+	 *
+	 * can be enabled with atributes under
+	 * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/
+	 *
+	 */
+	tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL,
+				ISCSI_TCP);
+	if (IS_ERR(tpg_np)) {
+		iscsit_put_tpg(tpg);
+		return ERR_PTR(PTR_ERR(tpg_np));
+	}
+	pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
+
+	iscsit_put_tpg(tpg);
+	return &tpg_np->se_tpg_np;
+}
+
+static void lio_target_call_delnpfromtpg(
+	struct se_tpg_np *se_tpg_np)
+{
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tpg_np *tpg_np;
+	struct se_portal_group *se_tpg;
+	int ret;
+
+	tpg_np = container_of(se_tpg_np, struct iscsi_tpg_np, se_tpg_np);
+	tpg = tpg_np->tpg;
+	ret = iscsit_get_tpg(tpg);
+	if (ret < 0)
+		return;
+
+	se_tpg = &tpg->tpg_se_tpg;
+	pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu"
+		" PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+		tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port);
+
+	ret = iscsit_tpg_del_network_portal(tpg, tpg_np);
+	if (ret < 0)
+		goto out;
+
+	pr_debug("LIO_Target_ConfigFS: delnpfromtpg done!\n");
+out:
+	iscsit_put_tpg(tpg);
+}
+
+/* End items for lio_target_np_cit */
+
+/* Start items for lio_target_nacl_attrib_cit */
+
+#define DEF_NACL_ATTRIB(name)						\
+static ssize_t iscsi_nacl_attrib_show_##name(				\
+	struct se_node_acl *se_nacl,					\
+	char *page)							\
+{									\
+	struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \
+					se_node_acl);			\
+									\
+	return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name);	\
+}									\
+									\
+static ssize_t iscsi_nacl_attrib_store_##name(				\
+	struct se_node_acl *se_nacl,					\
+	const char *page,						\
+	size_t count)							\
+{									\
+	struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \
+					se_node_acl);			\
+	char *endptr;							\
+	u32 val;							\
+	int ret;							\
+									\
+	val = simple_strtoul(page, &endptr, 0);				\
+	ret = iscsit_na_##name(nacl, val);				\
+	if (ret < 0)							\
+		return ret;						\
+									\
+	return count;							\
+}
+
+#define NACL_ATTR(_name, _mode) TF_NACL_ATTRIB_ATTR(iscsi, _name, _mode);
+/*
+ * Define iscsi_node_attrib_s_dataout_timeout
+ */
+DEF_NACL_ATTRIB(dataout_timeout);
+NACL_ATTR(dataout_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_dataout_timeout_retries
+ */
+DEF_NACL_ATTRIB(dataout_timeout_retries);
+NACL_ATTR(dataout_timeout_retries, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_default_erl
+ */
+DEF_NACL_ATTRIB(default_erl);
+NACL_ATTR(default_erl, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_nopin_timeout
+ */
+DEF_NACL_ATTRIB(nopin_timeout);
+NACL_ATTR(nopin_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_nopin_response_timeout
+ */
+DEF_NACL_ATTRIB(nopin_response_timeout);
+NACL_ATTR(nopin_response_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_datain_pdu_offsets
+ */
+DEF_NACL_ATTRIB(random_datain_pdu_offsets);
+NACL_ATTR(random_datain_pdu_offsets, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_datain_seq_offsets
+ */
+DEF_NACL_ATTRIB(random_datain_seq_offsets);
+NACL_ATTR(random_datain_seq_offsets, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_r2t_offsets
+ */
+DEF_NACL_ATTRIB(random_r2t_offsets);
+NACL_ATTR(random_r2t_offsets, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_nacl_attrib_attrs[] = {
+	&iscsi_nacl_attrib_dataout_timeout.attr,
+	&iscsi_nacl_attrib_dataout_timeout_retries.attr,
+	&iscsi_nacl_attrib_default_erl.attr,
+	&iscsi_nacl_attrib_nopin_timeout.attr,
+	&iscsi_nacl_attrib_nopin_response_timeout.attr,
+	&iscsi_nacl_attrib_random_datain_pdu_offsets.attr,
+	&iscsi_nacl_attrib_random_datain_seq_offsets.attr,
+	&iscsi_nacl_attrib_random_r2t_offsets.attr,
+	NULL,
+};
+
+/* End items for lio_target_nacl_attrib_cit */
+
+/* Start items for lio_target_nacl_auth_cit */
+
+#define __DEF_NACL_AUTH_STR(prefix, name, flags)			\
+static ssize_t __iscsi_##prefix##_show_##name(				\
+	struct iscsi_node_acl *nacl,					\
+	char *page)							\
+{									\
+	struct iscsi_node_auth *auth = &nacl->node_auth;		\
+									\
+	if (!capable(CAP_SYS_ADMIN))					\
+		return -EPERM;						\
+	return snprintf(page, PAGE_SIZE, "%s\n", auth->name);		\
+}									\
+									\
+static ssize_t __iscsi_##prefix##_store_##name(				\
+	struct iscsi_node_acl *nacl,					\
+	const char *page,						\
+	size_t count)							\
+{									\
+	struct iscsi_node_auth *auth = &nacl->node_auth;		\
+									\
+	if (!capable(CAP_SYS_ADMIN))					\
+		return -EPERM;						\
+									\
+	snprintf(auth->name, PAGE_SIZE, "%s", page);			\
+	if (!strncmp("NULL", auth->name, 4))				\
+		auth->naf_flags &= ~flags;				\
+	else								\
+		auth->naf_flags |= flags;				\
+									\
+	if ((auth->naf_flags & NAF_USERID_IN_SET) &&			\
+	    (auth->naf_flags & NAF_PASSWORD_IN_SET))			\
+		auth->authenticate_target = 1;				\
+	else								\
+		auth->authenticate_target = 0;				\
+									\
+	return count;							\
+}
+
+#define __DEF_NACL_AUTH_INT(prefix, name)				\
+static ssize_t __iscsi_##prefix##_show_##name(				\
+	struct iscsi_node_acl *nacl,					\
+	char *page)							\
+{									\
+	struct iscsi_node_auth *auth = &nacl->node_auth;		\
+									\
+	if (!capable(CAP_SYS_ADMIN))					\
+		return -EPERM;						\
+									\
+	return snprintf(page, PAGE_SIZE, "%d\n", auth->name);		\
+}
+
+#define DEF_NACL_AUTH_STR(name, flags)					\
+	__DEF_NACL_AUTH_STR(nacl_auth, name, flags)			\
+static ssize_t iscsi_nacl_auth_show_##name(				\
+	struct se_node_acl *nacl,					\
+	char *page)							\
+{									\
+	return __iscsi_nacl_auth_show_##name(container_of(nacl,		\
+			struct iscsi_node_acl, se_node_acl), page);		\
+}									\
+static ssize_t iscsi_nacl_auth_store_##name(				\
+	struct se_node_acl *nacl,					\
+	const char *page,						\
+	size_t count)							\
+{									\
+	return __iscsi_nacl_auth_store_##name(container_of(nacl,	\
+			struct iscsi_node_acl, se_node_acl), page, count);	\
+}
+
+#define DEF_NACL_AUTH_INT(name)						\
+	__DEF_NACL_AUTH_INT(nacl_auth, name)				\
+static ssize_t iscsi_nacl_auth_show_##name(				\
+	struct se_node_acl *nacl,					\
+	char *page)							\
+{									\
+	return __iscsi_nacl_auth_show_##name(container_of(nacl,		\
+			struct iscsi_node_acl, se_node_acl), page);		\
+}
+
+#define AUTH_ATTR(_name, _mode)	TF_NACL_AUTH_ATTR(iscsi, _name, _mode);
+#define AUTH_ATTR_RO(_name) TF_NACL_AUTH_ATTR_RO(iscsi, _name);
+
+/*
+ * One-way authentication userid
+ */
+DEF_NACL_AUTH_STR(userid, NAF_USERID_SET);
+AUTH_ATTR(userid, S_IRUGO | S_IWUSR);
+/*
+ * One-way authentication password
+ */
+DEF_NACL_AUTH_STR(password, NAF_PASSWORD_SET);
+AUTH_ATTR(password, S_IRUGO | S_IWUSR);
+/*
+ * Enforce mutual authentication
+ */
+DEF_NACL_AUTH_INT(authenticate_target);
+AUTH_ATTR_RO(authenticate_target);
+/*
+ * Mutual authentication userid
+ */
+DEF_NACL_AUTH_STR(userid_mutual, NAF_USERID_IN_SET);
+AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR);
+/*
+ * Mutual authentication password
+ */
+DEF_NACL_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET);
+AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_nacl_auth_attrs[] = {
+	&iscsi_nacl_auth_userid.attr,
+	&iscsi_nacl_auth_password.attr,
+	&iscsi_nacl_auth_authenticate_target.attr,
+	&iscsi_nacl_auth_userid_mutual.attr,
+	&iscsi_nacl_auth_password_mutual.attr,
+	NULL,
+};
+
+/* End items for lio_target_nacl_auth_cit */
+
+/* Start items for lio_target_nacl_param_cit */
+
+#define DEF_NACL_PARAM(name)						\
+static ssize_t iscsi_nacl_param_show_##name(				\
+	struct se_node_acl *se_nacl,					\
+	char *page)							\
+{									\
+	struct iscsi_session *sess;					\
+	struct se_session *se_sess;					\
+	ssize_t rb;							\
+									\
+	spin_lock_bh(&se_nacl->nacl_sess_lock);				\
+	se_sess = se_nacl->nacl_sess;					\
+	if (!se_sess) {							\
+		rb = snprintf(page, PAGE_SIZE,				\
+			"No Active iSCSI Session\n");			\
+	} else {							\
+		sess = se_sess->fabric_sess_ptr;			\
+		rb = snprintf(page, PAGE_SIZE, "%u\n",			\
+			(u32)sess->sess_ops->name);			\
+	}								\
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);			\
+									\
+	return rb;							\
+}
+
+#define NACL_PARAM_ATTR(_name) TF_NACL_PARAM_ATTR_RO(iscsi, _name);
+
+DEF_NACL_PARAM(MaxConnections);
+NACL_PARAM_ATTR(MaxConnections);
+
+DEF_NACL_PARAM(InitialR2T);
+NACL_PARAM_ATTR(InitialR2T);
+
+DEF_NACL_PARAM(ImmediateData);
+NACL_PARAM_ATTR(ImmediateData);
+
+DEF_NACL_PARAM(MaxBurstLength);
+NACL_PARAM_ATTR(MaxBurstLength);
+
+DEF_NACL_PARAM(FirstBurstLength);
+NACL_PARAM_ATTR(FirstBurstLength);
+
+DEF_NACL_PARAM(DefaultTime2Wait);
+NACL_PARAM_ATTR(DefaultTime2Wait);
+
+DEF_NACL_PARAM(DefaultTime2Retain);
+NACL_PARAM_ATTR(DefaultTime2Retain);
+
+DEF_NACL_PARAM(MaxOutstandingR2T);
+NACL_PARAM_ATTR(MaxOutstandingR2T);
+
+DEF_NACL_PARAM(DataPDUInOrder);
+NACL_PARAM_ATTR(DataPDUInOrder);
+
+DEF_NACL_PARAM(DataSequenceInOrder);
+NACL_PARAM_ATTR(DataSequenceInOrder);
+
+DEF_NACL_PARAM(ErrorRecoveryLevel);
+NACL_PARAM_ATTR(ErrorRecoveryLevel);
+
+static struct configfs_attribute *lio_target_nacl_param_attrs[] = {
+	&iscsi_nacl_param_MaxConnections.attr,
+	&iscsi_nacl_param_InitialR2T.attr,
+	&iscsi_nacl_param_ImmediateData.attr,
+	&iscsi_nacl_param_MaxBurstLength.attr,
+	&iscsi_nacl_param_FirstBurstLength.attr,
+	&iscsi_nacl_param_DefaultTime2Wait.attr,
+	&iscsi_nacl_param_DefaultTime2Retain.attr,
+	&iscsi_nacl_param_MaxOutstandingR2T.attr,
+	&iscsi_nacl_param_DataPDUInOrder.attr,
+	&iscsi_nacl_param_DataSequenceInOrder.attr,
+	&iscsi_nacl_param_ErrorRecoveryLevel.attr,
+	NULL,
+};
+
+/* End items for lio_target_nacl_param_cit */
+
+/* Start items for lio_target_acl_cit */
+
+static ssize_t lio_target_nacl_show_info(
+	struct se_node_acl *se_nacl,
+	char *page)
+{
+	struct iscsi_session *sess;
+	struct iscsi_conn *conn;
+	struct se_session *se_sess;
+	ssize_t rb = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (!se_sess) {
+		rb += sprintf(page+rb, "No active iSCSI Session for Initiator"
+			" Endpoint: %s\n", se_nacl->initiatorname);
+	} else {
+		sess = se_sess->fabric_sess_ptr;
+
+		if (sess->sess_ops->InitiatorName)
+			rb += sprintf(page+rb, "InitiatorName: %s\n",
+				sess->sess_ops->InitiatorName);
+		if (sess->sess_ops->InitiatorAlias)
+			rb += sprintf(page+rb, "InitiatorAlias: %s\n",
+				sess->sess_ops->InitiatorAlias);
+
+		rb += sprintf(page+rb, "LIO Session ID: %u   "
+			"ISID: 0x%02x %02x %02x %02x %02x %02x  "
+			"TSIH: %hu  ", sess->sid,
+			sess->isid[0], sess->isid[1], sess->isid[2],
+			sess->isid[3], sess->isid[4], sess->isid[5],
+			sess->tsih);
+		rb += sprintf(page+rb, "SessionType: %s\n",
+				(sess->sess_ops->SessionType) ?
+				"Discovery" : "Normal");
+		rb += sprintf(page+rb, "Session State: ");
+		switch (sess->session_state) {
+		case TARG_SESS_STATE_FREE:
+			rb += sprintf(page+rb, "TARG_SESS_FREE\n");
+			break;
+		case TARG_SESS_STATE_ACTIVE:
+			rb += sprintf(page+rb, "TARG_SESS_STATE_ACTIVE\n");
+			break;
+		case TARG_SESS_STATE_LOGGED_IN:
+			rb += sprintf(page+rb, "TARG_SESS_STATE_LOGGED_IN\n");
+			break;
+		case TARG_SESS_STATE_FAILED:
+			rb += sprintf(page+rb, "TARG_SESS_STATE_FAILED\n");
+			break;
+		case TARG_SESS_STATE_IN_CONTINUE:
+			rb += sprintf(page+rb, "TARG_SESS_STATE_IN_CONTINUE\n");
+			break;
+		default:
+			rb += sprintf(page+rb, "ERROR: Unknown Session"
+					" State!\n");
+			break;
+		}
+
+		rb += sprintf(page+rb, "---------------------[iSCSI Session"
+				" Values]-----------------------\n");
+		rb += sprintf(page+rb, "  CmdSN/WR  :  CmdSN/WC  :  ExpCmdSN"
+				"  :  MaxCmdSN  :     ITT    :     TTT\n");
+		rb += sprintf(page+rb, " 0x%08x   0x%08x   0x%08x   0x%08x"
+				"   0x%08x   0x%08x\n",
+			sess->cmdsn_window,
+			(sess->max_cmd_sn - sess->exp_cmd_sn) + 1,
+			sess->exp_cmd_sn, sess->max_cmd_sn,
+			sess->init_task_tag, sess->targ_xfer_tag);
+		rb += sprintf(page+rb, "----------------------[iSCSI"
+				" Connections]-------------------------\n");
+
+		spin_lock(&sess->conn_lock);
+		list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+			rb += sprintf(page+rb, "CID: %hu  Connection"
+					" State: ", conn->cid);
+			switch (conn->conn_state) {
+			case TARG_CONN_STATE_FREE:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_FREE\n");
+				break;
+			case TARG_CONN_STATE_XPT_UP:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_XPT_UP\n");
+				break;
+			case TARG_CONN_STATE_IN_LOGIN:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_IN_LOGIN\n");
+				break;
+			case TARG_CONN_STATE_LOGGED_IN:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_LOGGED_IN\n");
+				break;
+			case TARG_CONN_STATE_IN_LOGOUT:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_IN_LOGOUT\n");
+				break;
+			case TARG_CONN_STATE_LOGOUT_REQUESTED:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_LOGOUT_REQUESTED\n");
+				break;
+			case TARG_CONN_STATE_CLEANUP_WAIT:
+				rb += sprintf(page+rb,
+					"TARG_CONN_STATE_CLEANUP_WAIT\n");
+				break;
+			default:
+				rb += sprintf(page+rb,
+					"ERROR: Unknown Connection State!\n");
+				break;
+			}
+
+			rb += sprintf(page+rb, "   Address %s %s", conn->login_ip,
+				(conn->network_transport == ISCSI_TCP) ?
+				"TCP" : "SCTP");
+			rb += sprintf(page+rb, "  StatSN: 0x%08x\n",
+				conn->stat_sn);
+		}
+		spin_unlock(&sess->conn_lock);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return rb;
+}
+
+TF_NACL_BASE_ATTR_RO(lio_target, info);
+
+static ssize_t lio_target_nacl_show_cmdsn_depth(
+	struct se_node_acl *se_nacl,
+	char *page)
+{
+	return sprintf(page, "%u\n", se_nacl->queue_depth);
+}
+
+static ssize_t lio_target_nacl_store_cmdsn_depth(
+	struct se_node_acl *se_nacl,
+	const char *page,
+	size_t count)
+{
+	struct se_portal_group *se_tpg = se_nacl->se_tpg;
+	struct iscsi_portal_group *tpg = container_of(se_tpg,
+			struct iscsi_portal_group, tpg_se_tpg);
+	struct config_item *acl_ci, *tpg_ci, *wwn_ci;
+	char *endptr;
+	u32 cmdsn_depth = 0;
+	int ret;
+
+	cmdsn_depth = simple_strtoul(page, &endptr, 0);
+	if (cmdsn_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) {
+		pr_err("Passed cmdsn_depth: %u exceeds"
+			" TA_DEFAULT_CMDSN_DEPTH_MAX: %u\n", cmdsn_depth,
+			TA_DEFAULT_CMDSN_DEPTH_MAX);
+		return -EINVAL;
+	}
+	acl_ci = &se_nacl->acl_group.cg_item;
+	if (!acl_ci) {
+		pr_err("Unable to locatel acl_ci\n");
+		return -EINVAL;
+	}
+	tpg_ci = &acl_ci->ci_parent->ci_group->cg_item;
+	if (!tpg_ci) {
+		pr_err("Unable to locate tpg_ci\n");
+		return -EINVAL;
+	}
+	wwn_ci = &tpg_ci->ci_group->cg_item;
+	if (!wwn_ci) {
+		pr_err("Unable to locate config_item wwn_ci\n");
+		return -EINVAL;
+	}
+
+	if (iscsit_get_tpg(tpg) < 0)
+		return -EINVAL;
+	/*
+	 * iscsit_tpg_set_initiator_node_queue_depth() assumes force=1
+	 */
+	ret = iscsit_tpg_set_initiator_node_queue_depth(tpg,
+				config_item_name(acl_ci), cmdsn_depth, 1);
+
+	pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for"
+		"InitiatorName: %s\n", config_item_name(wwn_ci),
+		config_item_name(tpg_ci), cmdsn_depth,
+		config_item_name(acl_ci));
+
+	iscsit_put_tpg(tpg);
+	return (!ret) ? count : (ssize_t)ret;
+}
+
+TF_NACL_BASE_ATTR(lio_target, cmdsn_depth, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_initiator_attrs[] = {
+	&lio_target_nacl_info.attr,
+	&lio_target_nacl_cmdsn_depth.attr,
+	NULL,
+};
+
+static struct se_node_acl *lio_tpg_alloc_fabric_acl(
+	struct se_portal_group *se_tpg)
+{
+	struct iscsi_node_acl *acl;
+
+	acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL);
+	if (!acl) {
+		pr_err("Unable to allocate memory for struct iscsi_node_acl\n");
+		return NULL;
+	}
+
+	return &acl->se_node_acl;
+}
+
+static struct se_node_acl *lio_target_make_nodeacl(
+	struct se_portal_group *se_tpg,
+	struct config_group *group,
+	const char *name)
+{
+	struct config_group *stats_cg;
+	struct iscsi_node_acl *acl;
+	struct se_node_acl *se_nacl_new, *se_nacl;
+	struct iscsi_portal_group *tpg = container_of(se_tpg,
+			struct iscsi_portal_group, tpg_se_tpg);
+	u32 cmdsn_depth;
+
+	se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg);
+	if (!se_nacl_new)
+		return ERR_PTR(-ENOMEM);
+
+	acl = container_of(se_nacl_new, struct iscsi_node_acl,
+				se_node_acl);
+
+	cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
+	/*
+	 * se_nacl_new may be released by core_tpg_add_initiator_node_acl()
+	 * when converting a NdoeACL from demo mode -> explict
+	 */
+	se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,
+				name, cmdsn_depth);
+	if (IS_ERR(se_nacl))
+		return se_nacl;
+
+	stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+
+	stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
+				GFP_KERNEL);
+	if (!stats_cg->default_groups) {
+		pr_err("Unable to allocate memory for"
+				" stats_cg->default_groups\n");
+		core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1);
+		kfree(acl);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group;
+	stats_cg->default_groups[1] = NULL;
+	config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group,
+			"iscsi_sess_stats", &iscsi_stat_sess_cit);
+
+	return se_nacl;
+}
+
+static void lio_target_drop_nodeacl(
+	struct se_node_acl *se_nacl)
+{
+	struct se_portal_group *se_tpg = se_nacl->se_tpg;
+	struct iscsi_node_acl *acl = container_of(se_nacl,
+			struct iscsi_node_acl, se_node_acl);
+	struct config_item *df_item;
+	struct config_group *stats_cg;
+	int i;
+
+	stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+	for (i = 0; stats_cg->default_groups[i]; i++) {
+		df_item = &stats_cg->default_groups[i]->cg_item;
+		stats_cg->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(stats_cg->default_groups);
+
+	core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1);
+	kfree(acl);
+}
+
+/* End items for lio_target_acl_cit */
+
+/* Start items for lio_target_tpg_attrib_cit */
+
+#define DEF_TPG_ATTRIB(name)						\
+									\
+static ssize_t iscsi_tpg_attrib_show_##name(				\
+	struct se_portal_group *se_tpg,				\
+	char *page)							\
+{									\
+	struct iscsi_portal_group *tpg = container_of(se_tpg,		\
+			struct iscsi_portal_group, tpg_se_tpg);	\
+	ssize_t rb;							\
+									\
+	if (iscsit_get_tpg(tpg) < 0)					\
+		return -EINVAL;						\
+									\
+	rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name);	\
+	iscsit_put_tpg(tpg);						\
+	return rb;							\
+}									\
+									\
+static ssize_t iscsi_tpg_attrib_store_##name(				\
+	struct se_portal_group *se_tpg,				\
+	const char *page,						\
+	size_t count)							\
+{									\
+	struct iscsi_portal_group *tpg = container_of(se_tpg,		\
+			struct iscsi_portal_group, tpg_se_tpg);	\
+	char *endptr;							\
+	u32 val;							\
+	int ret;							\
+									\
+	if (iscsit_get_tpg(tpg) < 0)					\
+		return -EINVAL;						\
+									\
+	val = simple_strtoul(page, &endptr, 0);				\
+	ret = iscsit_ta_##name(tpg, val);				\
+	if (ret < 0)							\
+		goto out;						\
+									\
+	iscsit_put_tpg(tpg);						\
+	return count;							\
+out:									\
+	iscsit_put_tpg(tpg);						\
+	return ret;							\
+}
+
+#define TPG_ATTR(_name, _mode) TF_TPG_ATTRIB_ATTR(iscsi, _name, _mode);
+
+/*
+ * Define iscsi_tpg_attrib_s_authentication
+ */
+DEF_TPG_ATTRIB(authentication);
+TPG_ATTR(authentication, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_login_timeout
+ */
+DEF_TPG_ATTRIB(login_timeout);
+TPG_ATTR(login_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_netif_timeout
+ */
+DEF_TPG_ATTRIB(netif_timeout);
+TPG_ATTR(netif_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_generate_node_acls
+ */
+DEF_TPG_ATTRIB(generate_node_acls);
+TPG_ATTR(generate_node_acls, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_default_cmdsn_depth
+ */
+DEF_TPG_ATTRIB(default_cmdsn_depth);
+TPG_ATTR(default_cmdsn_depth, S_IRUGO | S_IWUSR);
+/*
+ Define iscsi_tpg_attrib_s_cache_dynamic_acls
+ */
+DEF_TPG_ATTRIB(cache_dynamic_acls);
+TPG_ATTR(cache_dynamic_acls, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_demo_mode_write_protect
+ */
+DEF_TPG_ATTRIB(demo_mode_write_protect);
+TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_prod_mode_write_protect
+ */
+DEF_TPG_ATTRIB(prod_mode_write_protect);
+TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
+	&iscsi_tpg_attrib_authentication.attr,
+	&iscsi_tpg_attrib_login_timeout.attr,
+	&iscsi_tpg_attrib_netif_timeout.attr,
+	&iscsi_tpg_attrib_generate_node_acls.attr,
+	&iscsi_tpg_attrib_default_cmdsn_depth.attr,
+	&iscsi_tpg_attrib_cache_dynamic_acls.attr,
+	&iscsi_tpg_attrib_demo_mode_write_protect.attr,
+	&iscsi_tpg_attrib_prod_mode_write_protect.attr,
+	NULL,
+};
+
+/* End items for lio_target_tpg_attrib_cit */
+
+/* Start items for lio_target_tpg_param_cit */
+
+#define DEF_TPG_PARAM(name)						\
+static ssize_t iscsi_tpg_param_show_##name(				\
+	struct se_portal_group *se_tpg,					\
+	char *page)							\
+{									\
+	struct iscsi_portal_group *tpg = container_of(se_tpg,		\
+			struct iscsi_portal_group, tpg_se_tpg);		\
+	struct iscsi_param *param;					\
+	ssize_t rb;							\
+									\
+	if (iscsit_get_tpg(tpg) < 0)					\
+		return -EINVAL;						\
+									\
+	param = iscsi_find_param_from_key(__stringify(name),		\
+				tpg->param_list);			\
+	if (!param) {							\
+		iscsit_put_tpg(tpg);					\
+		return -EINVAL;						\
+	}								\
+	rb = snprintf(page, PAGE_SIZE, "%s\n", param->value);		\
+									\
+	iscsit_put_tpg(tpg);						\
+	return rb;							\
+}									\
+static ssize_t iscsi_tpg_param_store_##name(				\
+	struct se_portal_group *se_tpg,				\
+	const char *page,						\
+	size_t count)							\
+{									\
+	struct iscsi_portal_group *tpg = container_of(se_tpg,		\
+			struct iscsi_portal_group, tpg_se_tpg);		\
+	char *buf;							\
+	int ret;							\
+									\
+	buf = kzalloc(PAGE_SIZE, GFP_KERNEL);				\
+	if (!buf)							\
+		return -ENOMEM;						\
+	snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page);	\
+	buf[strlen(buf)-1] = '\0'; /* Kill newline */			\
+									\
+	if (iscsit_get_tpg(tpg) < 0) {					\
+		kfree(buf);						\
+		return -EINVAL;						\
+	}								\
+									\
+	ret = iscsi_change_param_value(buf, tpg->param_list, 1);	\
+	if (ret < 0)							\
+		goto out;						\
+									\
+	kfree(buf);							\
+	iscsit_put_tpg(tpg);						\
+	return count;							\
+out:									\
+	kfree(buf);							\
+	iscsit_put_tpg(tpg);						\
+	return -EINVAL;						\
+}
+
+#define TPG_PARAM_ATTR(_name, _mode) TF_TPG_PARAM_ATTR(iscsi, _name, _mode);
+
+DEF_TPG_PARAM(AuthMethod);
+TPG_PARAM_ATTR(AuthMethod, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(HeaderDigest);
+TPG_PARAM_ATTR(HeaderDigest, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataDigest);
+TPG_PARAM_ATTR(DataDigest, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxConnections);
+TPG_PARAM_ATTR(MaxConnections, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(TargetAlias);
+TPG_PARAM_ATTR(TargetAlias, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(InitialR2T);
+TPG_PARAM_ATTR(InitialR2T, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(ImmediateData);
+TPG_PARAM_ATTR(ImmediateData, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxRecvDataSegmentLength);
+TPG_PARAM_ATTR(MaxRecvDataSegmentLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxBurstLength);
+TPG_PARAM_ATTR(MaxBurstLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(FirstBurstLength);
+TPG_PARAM_ATTR(FirstBurstLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DefaultTime2Wait);
+TPG_PARAM_ATTR(DefaultTime2Wait, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DefaultTime2Retain);
+TPG_PARAM_ATTR(DefaultTime2Retain, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxOutstandingR2T);
+TPG_PARAM_ATTR(MaxOutstandingR2T, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataPDUInOrder);
+TPG_PARAM_ATTR(DataPDUInOrder, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataSequenceInOrder);
+TPG_PARAM_ATTR(DataSequenceInOrder, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(ErrorRecoveryLevel);
+TPG_PARAM_ATTR(ErrorRecoveryLevel, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(IFMarker);
+TPG_PARAM_ATTR(IFMarker, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(OFMarker);
+TPG_PARAM_ATTR(OFMarker, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(IFMarkInt);
+TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(OFMarkInt);
+TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_param_attrs[] = {
+	&iscsi_tpg_param_AuthMethod.attr,
+	&iscsi_tpg_param_HeaderDigest.attr,
+	&iscsi_tpg_param_DataDigest.attr,
+	&iscsi_tpg_param_MaxConnections.attr,
+	&iscsi_tpg_param_TargetAlias.attr,
+	&iscsi_tpg_param_InitialR2T.attr,
+	&iscsi_tpg_param_ImmediateData.attr,
+	&iscsi_tpg_param_MaxRecvDataSegmentLength.attr,
+	&iscsi_tpg_param_MaxBurstLength.attr,
+	&iscsi_tpg_param_FirstBurstLength.attr,
+	&iscsi_tpg_param_DefaultTime2Wait.attr,
+	&iscsi_tpg_param_DefaultTime2Retain.attr,
+	&iscsi_tpg_param_MaxOutstandingR2T.attr,
+	&iscsi_tpg_param_DataPDUInOrder.attr,
+	&iscsi_tpg_param_DataSequenceInOrder.attr,
+	&iscsi_tpg_param_ErrorRecoveryLevel.attr,
+	&iscsi_tpg_param_IFMarker.attr,
+	&iscsi_tpg_param_OFMarker.attr,
+	&iscsi_tpg_param_IFMarkInt.attr,
+	&iscsi_tpg_param_OFMarkInt.attr,
+	NULL,
+};
+
+/* End items for lio_target_tpg_param_cit */
+
+/* Start items for lio_target_tpg_cit */
+
+static ssize_t lio_target_tpg_show_enable(
+	struct se_portal_group *se_tpg,
+	char *page)
+{
+	struct iscsi_portal_group *tpg = container_of(se_tpg,
+			struct iscsi_portal_group, tpg_se_tpg);
+	ssize_t len;
+
+	spin_lock(&tpg->tpg_state_lock);
+	len = sprintf(page, "%d\n",
+			(tpg->tpg_state == TPG_STATE_ACTIVE) ? 1 : 0);
+	spin_unlock(&tpg->tpg_state_lock);
+
+	return len;
+}
+
+static ssize_t lio_target_tpg_store_enable(
+	struct se_portal_group *se_tpg,
+	const char *page,
+	size_t count)
+{
+	struct iscsi_portal_group *tpg = container_of(se_tpg,
+			struct iscsi_portal_group, tpg_se_tpg);
+	char *endptr;
+	u32 op;
+	int ret = 0;
+
+	op = simple_strtoul(page, &endptr, 0);
+	if ((op != 1) && (op != 0)) {
+		pr_err("Illegal value for tpg_enable: %u\n", op);
+		return -EINVAL;
+	}
+
+	ret = iscsit_get_tpg(tpg);
+	if (ret < 0)
+		return -EINVAL;
+
+	if (op) {
+		ret = iscsit_tpg_enable_portal_group(tpg);
+		if (ret < 0)
+			goto out;
+	} else {
+		/*
+		 * iscsit_tpg_disable_portal_group() assumes force=1
+		 */
+		ret = iscsit_tpg_disable_portal_group(tpg, 1);
+		if (ret < 0)
+			goto out;
+	}
+
+	iscsit_put_tpg(tpg);
+	return count;
+out:
+	iscsit_put_tpg(tpg);
+	return -EINVAL;
+}
+
+TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_attrs[] = {
+	&lio_target_tpg_enable.attr,
+	NULL,
+};
+
+/* End items for lio_target_tpg_cit */
+
+/* Start items for lio_target_tiqn_cit */
+
+struct se_portal_group *lio_target_tiqn_addtpg(
+	struct se_wwn *wwn,
+	struct config_group *group,
+	const char *name)
+{
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tiqn *tiqn;
+	char *tpgt_str, *end_ptr;
+	int ret = 0;
+	unsigned short int tpgt;
+
+	tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+	/*
+	 * Only tpgt_# directory groups can be created below
+	 * target/iscsi/iqn.superturodiskarry/
+	*/
+	tpgt_str = strstr(name, "tpgt_");
+	if (!tpgt_str) {
+		pr_err("Unable to locate \"tpgt_#\" directory"
+				" group\n");
+		return NULL;
+	}
+	tpgt_str += 5; /* Skip ahead of "tpgt_" */
+	tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0);
+
+	tpg = iscsit_alloc_portal_group(tiqn, tpgt);
+	if (!tpg)
+		return NULL;
+
+	ret = core_tpg_register(
+			&lio_target_fabric_configfs->tf_ops,
+			wwn, &tpg->tpg_se_tpg, (void *)tpg,
+			TRANSPORT_TPG_TYPE_NORMAL);
+	if (ret < 0)
+		return NULL;
+
+	ret = iscsit_tpg_add_portal_group(tiqn, tpg);
+	if (ret != 0)
+		goto out;
+
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated TPG: %s\n",
+			name);
+	return &tpg->tpg_se_tpg;
+out:
+	core_tpg_deregister(&tpg->tpg_se_tpg);
+	kfree(tpg);
+	return NULL;
+}
+
+void lio_target_tiqn_deltpg(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tiqn *tiqn;
+
+	tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
+	tiqn = tpg->tpg_tiqn;
+	/*
+	 * iscsit_tpg_del_portal_group() assumes force=1
+	 */
+	pr_debug("LIO_Target_ConfigFS: DEREGISTER -> Releasing TPG\n");
+	iscsit_tpg_del_portal_group(tiqn, tpg, 1);
+}
+
+/* End items for lio_target_tiqn_cit */
+
+/* Start LIO-Target TIQN struct contig_item lio_target_cit */
+
+static ssize_t lio_target_wwn_show_attr_lio_version(
+	struct target_fabric_configfs *tf,
+	char *page)
+{
+	return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n");
+}
+
+TF_WWN_ATTR_RO(lio_target, lio_version);
+
+static struct configfs_attribute *lio_target_wwn_attrs[] = {
+	&lio_target_wwn_lio_version.attr,
+	NULL,
+};
+
+struct se_wwn *lio_target_call_coreaddtiqn(
+	struct target_fabric_configfs *tf,
+	struct config_group *group,
+	const char *name)
+{
+	struct config_group *stats_cg;
+	struct iscsi_tiqn *tiqn;
+
+	tiqn = iscsit_add_tiqn((unsigned char *)name);
+	if (IS_ERR(tiqn))
+		return ERR_PTR(PTR_ERR(tiqn));
+	/*
+	 * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
+	 */
+	stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
+
+	stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 6,
+				GFP_KERNEL);
+	if (!stats_cg->default_groups) {
+		pr_err("Unable to allocate memory for"
+				" stats_cg->default_groups\n");
+		iscsit_del_tiqn(tiqn);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group;
+	stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group;
+	stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group;
+	stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group;
+	stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group;
+	stats_cg->default_groups[5] = NULL;
+	config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group,
+			"iscsi_instance", &iscsi_stat_instance_cit);
+	config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group,
+			"iscsi_sess_err", &iscsi_stat_sess_err_cit);
+	config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group,
+			"iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit);
+	config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group,
+			"iscsi_login_stats", &iscsi_stat_login_cit);
+	config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group,
+			"iscsi_logout_stats", &iscsi_stat_logout_cit);
+
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:"
+			" %s\n", name);
+	return &tiqn->tiqn_wwn;
+}
+
+void lio_target_call_coredeltiqn(
+	struct se_wwn *wwn)
+{
+	struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+	struct config_item *df_item;
+	struct config_group *stats_cg;
+	int i;
+
+	stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
+	for (i = 0; stats_cg->default_groups[i]; i++) {
+		df_item = &stats_cg->default_groups[i]->cg_item;
+		stats_cg->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(stats_cg->default_groups);
+
+	pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n",
+			tiqn->tiqn);
+	iscsit_del_tiqn(tiqn);
+}
+
+/* End LIO-Target TIQN struct contig_lio_target_cit */
+
+/* Start lio_target_discovery_auth_cit */
+
+#define DEF_DISC_AUTH_STR(name, flags)					\
+	__DEF_NACL_AUTH_STR(disc, name, flags)				\
+static ssize_t iscsi_disc_show_##name(					\
+	struct target_fabric_configfs *tf,				\
+	char *page)							\
+{									\
+	return __iscsi_disc_show_##name(&iscsit_global->discovery_acl,	\
+		page);							\
+}									\
+static ssize_t iscsi_disc_store_##name(					\
+	struct target_fabric_configfs *tf,				\
+	const char *page,						\
+	size_t count)							\
+{									\
+	return __iscsi_disc_store_##name(&iscsit_global->discovery_acl,	\
+		page, count);						\
+}
+
+#define DEF_DISC_AUTH_INT(name)						\
+	__DEF_NACL_AUTH_INT(disc, name)					\
+static ssize_t iscsi_disc_show_##name(					\
+	struct target_fabric_configfs *tf,				\
+	char *page)							\
+{									\
+	return __iscsi_disc_show_##name(&iscsit_global->discovery_acl,	\
+			page);						\
+}
+
+#define DISC_AUTH_ATTR(_name, _mode) TF_DISC_ATTR(iscsi, _name, _mode)
+#define DISC_AUTH_ATTR_RO(_name) TF_DISC_ATTR_RO(iscsi, _name)
+
+/*
+ * One-way authentication userid
+ */
+DEF_DISC_AUTH_STR(userid, NAF_USERID_SET);
+DISC_AUTH_ATTR(userid, S_IRUGO | S_IWUSR);
+/*
+ * One-way authentication password
+ */
+DEF_DISC_AUTH_STR(password, NAF_PASSWORD_SET);
+DISC_AUTH_ATTR(password, S_IRUGO | S_IWUSR);
+/*
+ * Enforce mutual authentication
+ */
+DEF_DISC_AUTH_INT(authenticate_target);
+DISC_AUTH_ATTR_RO(authenticate_target);
+/*
+ * Mutual authentication userid
+ */
+DEF_DISC_AUTH_STR(userid_mutual, NAF_USERID_IN_SET);
+DISC_AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR);
+/*
+ * Mutual authentication password
+ */
+DEF_DISC_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET);
+DISC_AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR);
+
+/*
+ * enforce_discovery_auth
+ */
+static ssize_t iscsi_disc_show_enforce_discovery_auth(
+	struct target_fabric_configfs *tf,
+	char *page)
+{
+	struct iscsi_node_auth *discovery_auth = &iscsit_global->discovery_acl.node_auth;
+
+	return sprintf(page, "%d\n", discovery_auth->enforce_discovery_auth);
+}
+
+static ssize_t iscsi_disc_store_enforce_discovery_auth(
+	struct target_fabric_configfs *tf,
+	const char *page,
+	size_t count)
+{
+	struct iscsi_param *param;
+	struct iscsi_portal_group *discovery_tpg = iscsit_global->discovery_tpg;
+	char *endptr;
+	u32 op;
+
+	op = simple_strtoul(page, &endptr, 0);
+	if ((op != 1) && (op != 0)) {
+		pr_err("Illegal value for enforce_discovery_auth:"
+				" %u\n", op);
+		return -EINVAL;
+	}
+
+	if (!discovery_tpg) {
+		pr_err("iscsit_global->discovery_tpg is NULL\n");
+		return -EINVAL;
+	}
+
+	param = iscsi_find_param_from_key(AUTHMETHOD,
+				discovery_tpg->param_list);
+	if (!param)
+		return -EINVAL;
+
+	if (op) {
+		/*
+		 * Reset the AuthMethod key to CHAP.
+		 */
+		if (iscsi_update_param_value(param, CHAP) < 0)
+			return -EINVAL;
+
+		discovery_tpg->tpg_attrib.authentication = 1;
+		iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 1;
+		pr_debug("LIO-CORE[0] Successfully enabled"
+			" authentication enforcement for iSCSI"
+			" Discovery TPG\n");
+	} else {
+		/*
+		 * Reset the AuthMethod key to CHAP,None
+		 */
+		if (iscsi_update_param_value(param, "CHAP,None") < 0)
+			return -EINVAL;
+
+		discovery_tpg->tpg_attrib.authentication = 0;
+		iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 0;
+		pr_debug("LIO-CORE[0] Successfully disabled"
+			" authentication enforcement for iSCSI"
+			" Discovery TPG\n");
+	}
+
+	return count;
+}
+
+DISC_AUTH_ATTR(enforce_discovery_auth, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_discovery_auth_attrs[] = {
+	&iscsi_disc_userid.attr,
+	&iscsi_disc_password.attr,
+	&iscsi_disc_authenticate_target.attr,
+	&iscsi_disc_userid_mutual.attr,
+	&iscsi_disc_password_mutual.attr,
+	&iscsi_disc_enforce_discovery_auth.attr,
+	NULL,
+};
+
+/* End lio_target_discovery_auth_cit */
+
+/* Start functions for target_core_fabric_ops */
+
+static char *iscsi_get_fabric_name(void)
+{
+	return "iSCSI";
+}
+
+static u32 iscsi_get_task_tag(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	return cmd->init_task_tag;
+}
+
+static int iscsi_get_cmd_state(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	return cmd->i_state;
+}
+
+static int iscsi_is_state_remove(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	return (cmd->i_state == ISTATE_REMOVE);
+}
+
+static int lio_sess_logged_in(struct se_session *se_sess)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+	int ret;
+	/*
+	 * Called with spin_lock_bh(&tpg_lock); and
+	 * spin_lock(&se_tpg->session_lock); held.
+	 */
+	spin_lock(&sess->conn_lock);
+	ret = (sess->session_state != TARG_SESS_STATE_LOGGED_IN);
+	spin_unlock(&sess->conn_lock);
+
+	return ret;
+}
+
+static u32 lio_sess_get_index(struct se_session *se_sess)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+	return sess->session_index;
+}
+
+static u32 lio_sess_get_initiator_sid(
+	struct se_session *se_sess,
+	unsigned char *buf,
+	u32 size)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+	/*
+	 * iSCSI Initiator Session Identifier from RFC-3720.
+	 */
+	return snprintf(buf, size, "%02x%02x%02x%02x%02x%02x",
+		sess->isid[0], sess->isid[1], sess->isid[2],
+		sess->isid[3], sess->isid[4], sess->isid[5]);
+}
+
+static int lio_queue_data_in(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	cmd->i_state = ISTATE_SEND_DATAIN;
+	iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+	return 0;
+}
+
+static int lio_write_pending(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	if (!cmd->immediate_data && !cmd->unsolicited_data)
+		return iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 1);
+
+	return 0;
+}
+
+static int lio_write_pending_status(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+	int ret;
+
+	spin_lock_bh(&cmd->istate_lock);
+	ret = !(cmd->cmd_flags & ICF_GOT_LAST_DATAOUT);
+	spin_unlock_bh(&cmd->istate_lock);
+
+	return ret;
+}
+
+static int lio_queue_status(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	cmd->i_state = ISTATE_SEND_STATUS;
+	iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+	return 0;
+}
+
+static u16 lio_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_length)
+{
+	unsigned char *buffer = se_cmd->sense_buffer;
+	/*
+	 * From RFC-3720 10.4.7.  Data Segment - Sense and Response Data Segment
+	 * 16-bit SenseLength.
+	 */
+	buffer[0] = ((sense_length >> 8) & 0xff);
+	buffer[1] = (sense_length & 0xff);
+	/*
+	 * Return two byte offset into allocated sense_buffer.
+	 */
+	return 2;
+}
+
+static u16 lio_get_fabric_sense_len(void)
+{
+	/*
+	 * Return two byte offset into allocated sense_buffer.
+	 */
+	return 2;
+}
+
+static int lio_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	cmd->i_state = ISTATE_SEND_TASKMGTRSP;
+	iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+	return 0;
+}
+
+static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return &tpg->tpg_tiqn->tiqn[0];
+}
+
+static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return tpg->tpgt;
+}
+
+static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
+}
+
+static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls;
+}
+
+static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls;
+}
+
+static int lio_tpg_check_demo_mode_write_protect(
+	struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect;
+}
+
+static int lio_tpg_check_prod_mode_write_protect(
+	struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect;
+}
+
+static void lio_tpg_release_fabric_acl(
+	struct se_portal_group *se_tpg,
+	struct se_node_acl *se_acl)
+{
+	struct iscsi_node_acl *acl = container_of(se_acl,
+				struct iscsi_node_acl, se_node_acl);
+	kfree(acl);
+}
+
+/*
+ * Called with spin_lock_bh(struct se_portal_group->session_lock) held..
+ *
+ * Also, this function calls iscsit_inc_session_usage_count() on the
+ * struct iscsi_session in question.
+ */
+static int lio_tpg_shutdown_session(struct se_session *se_sess)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+	spin_lock(&sess->conn_lock);
+	if (atomic_read(&sess->session_fall_back_to_erl0) ||
+	    atomic_read(&sess->session_logout) ||
+	    (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+		spin_unlock(&sess->conn_lock);
+		return 0;
+	}
+	atomic_set(&sess->session_reinstatement, 1);
+	spin_unlock(&sess->conn_lock);
+
+	iscsit_inc_session_usage_count(sess);
+	iscsit_stop_time2retain_timer(sess);
+
+	return 1;
+}
+
+/*
+ * Calls iscsit_dec_session_usage_count() as inverse of
+ * lio_tpg_shutdown_session()
+ */
+static void lio_tpg_close_session(struct se_session *se_sess)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+	/*
+	 * If the iSCSI Session for the iSCSI Initiator Node exists,
+	 * forcefully shutdown the iSCSI NEXUS.
+	 */
+	iscsit_stop_session(sess, 1, 1);
+	iscsit_dec_session_usage_count(sess);
+	iscsit_close_session(sess);
+}
+
+static void lio_tpg_stop_session(
+	struct se_session *se_sess,
+	int sess_sleep,
+	int conn_sleep)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+	iscsit_stop_session(sess, sess_sleep, conn_sleep);
+}
+
+static void lio_tpg_fall_back_to_erl0(struct se_session *se_sess)
+{
+	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+	iscsit_fall_back_to_erl0(sess);
+}
+
+static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return tpg->tpg_tiqn->tiqn_index;
+}
+
+static void lio_set_default_node_attributes(struct se_node_acl *se_acl)
+{
+	struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl,
+				se_node_acl);
+
+	ISCSI_NODE_ATTRIB(acl)->nacl = acl;
+	iscsit_set_default_node_attribues(acl);
+}
+
+static void lio_release_cmd(struct se_cmd *se_cmd)
+{
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	iscsit_release_cmd(cmd);
+}
+
+/* End functions for target_core_fabric_ops */
+
+int iscsi_target_register_configfs(void)
+{
+	struct target_fabric_configfs *fabric;
+	int ret;
+
+	lio_target_fabric_configfs = NULL;
+	fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi");
+	if (IS_ERR(fabric)) {
+		pr_err("target_fabric_configfs_init() for"
+				" LIO-Target failed!\n");
+		return PTR_ERR(fabric);
+	}
+	/*
+	 * Setup the fabric API of function pointers used by target_core_mod..
+	 */
+	fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name;
+	fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident;
+	fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn;
+	fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag;
+	fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth;
+	fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id;
+	fabric->tf_ops.tpg_get_pr_transport_id_len =
+				&iscsi_get_pr_transport_id_len;
+	fabric->tf_ops.tpg_parse_pr_out_transport_id =
+				&iscsi_parse_pr_out_transport_id;
+	fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode;
+	fabric->tf_ops.tpg_check_demo_mode_cache =
+				&lio_tpg_check_demo_mode_cache;
+	fabric->tf_ops.tpg_check_demo_mode_write_protect =
+				&lio_tpg_check_demo_mode_write_protect;
+	fabric->tf_ops.tpg_check_prod_mode_write_protect =
+				&lio_tpg_check_prod_mode_write_protect;
+	fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl;
+	fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl;
+	fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index;
+	fabric->tf_ops.release_cmd = &lio_release_cmd;
+	fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session;
+	fabric->tf_ops.close_session = &lio_tpg_close_session;
+	fabric->tf_ops.stop_session = &lio_tpg_stop_session;
+	fabric->tf_ops.fall_back_to_erl0 = &lio_tpg_fall_back_to_erl0;
+	fabric->tf_ops.sess_logged_in = &lio_sess_logged_in;
+	fabric->tf_ops.sess_get_index = &lio_sess_get_index;
+	fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid;
+	fabric->tf_ops.write_pending = &lio_write_pending;
+	fabric->tf_ops.write_pending_status = &lio_write_pending_status;
+	fabric->tf_ops.set_default_node_attributes =
+				&lio_set_default_node_attributes;
+	fabric->tf_ops.get_task_tag = &iscsi_get_task_tag;
+	fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state;
+	fabric->tf_ops.queue_data_in = &lio_queue_data_in;
+	fabric->tf_ops.queue_status = &lio_queue_status;
+	fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp;
+	fabric->tf_ops.set_fabric_sense_len = &lio_set_fabric_sense_len;
+	fabric->tf_ops.get_fabric_sense_len = &lio_get_fabric_sense_len;
+	fabric->tf_ops.is_state_remove = &iscsi_is_state_remove;
+	/*
+	 * Setup function pointers for generic logic in target_core_fabric_configfs.c
+	 */
+	fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn;
+	fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn;
+	fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg;
+	fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg;
+	fabric->tf_ops.fabric_post_link	= NULL;
+	fabric->tf_ops.fabric_pre_unlink = NULL;
+	fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg;
+	fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg;
+	fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl;
+	fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl;
+	/*
+	 * Setup default attribute lists for various fabric->tf_cit_tmpl
+	 * sturct config_item_type's
+	 */
+	TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs;
+	TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs;
+
+	ret = target_fabric_configfs_register(fabric);
+	if (ret < 0) {
+		pr_err("target_fabric_configfs_register() for"
+				" LIO-Target failed!\n");
+		target_fabric_configfs_free(fabric);
+		return ret;
+	}
+
+	lio_target_fabric_configfs = fabric;
+	pr_debug("LIO_TARGET[0] - Set fabric ->"
+			" lio_target_fabric_configfs\n");
+	return 0;
+}
+
+
+void iscsi_target_deregister_configfs(void)
+{
+	if (!lio_target_fabric_configfs)
+		return;
+	/*
+	 * Shutdown discovery sessions and disable discovery TPG
+	 */
+	if (iscsit_global->discovery_tpg)
+		iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
+
+	target_fabric_configfs_deregister(lio_target_fabric_configfs);
+	lio_target_fabric_configfs = NULL;
+	pr_debug("LIO_TARGET[0] - Cleared"
+				" lio_target_fabric_configfs\n");
+}
diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h
new file mode 100644
index 0000000..8cd5a63
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_configfs.h
@@ -0,0 +1,7 @@
+#ifndef ISCSI_TARGET_CONFIGFS_H
+#define ISCSI_TARGET_CONFIGFS_H
+
+extern int iscsi_target_register_configfs(void);
+extern void iscsi_target_deregister_configfs(void);
+
+#endif /* ISCSI_TARGET_CONFIGFS_H */
diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
new file mode 100644
index 0000000..470ed55
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_core.h
@@ -0,0 +1,859 @@
+#ifndef ISCSI_TARGET_CORE_H
+#define ISCSI_TARGET_CORE_H
+
+#include <linux/in.h>
+#include <linux/configfs.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+
+#define ISCSIT_VERSION			"v4.1.0-rc1"
+#define ISCSI_MAX_DATASN_MISSING_COUNT	16
+#define ISCSI_TX_THREAD_TCP_TIMEOUT	2
+#define ISCSI_RX_THREAD_TCP_TIMEOUT	2
+#define SECONDS_FOR_ASYNC_LOGOUT	10
+#define SECONDS_FOR_ASYNC_TEXT		10
+#define SECONDS_FOR_LOGOUT_COMP		15
+#define WHITE_SPACE			" \t\v\f\n\r"
+
+/* struct iscsi_node_attrib sanity values */
+#define NA_DATAOUT_TIMEOUT		3
+#define NA_DATAOUT_TIMEOUT_MAX		60
+#define NA_DATAOUT_TIMEOUT_MIX		2
+#define NA_DATAOUT_TIMEOUT_RETRIES	5
+#define NA_DATAOUT_TIMEOUT_RETRIES_MAX	15
+#define NA_DATAOUT_TIMEOUT_RETRIES_MIN	1
+#define NA_NOPIN_TIMEOUT		5
+#define NA_NOPIN_TIMEOUT_MAX		60
+#define NA_NOPIN_TIMEOUT_MIN		3
+#define NA_NOPIN_RESPONSE_TIMEOUT	5
+#define NA_NOPIN_RESPONSE_TIMEOUT_MAX	60
+#define NA_NOPIN_RESPONSE_TIMEOUT_MIN	3
+#define NA_RANDOM_DATAIN_PDU_OFFSETS	0
+#define NA_RANDOM_DATAIN_SEQ_OFFSETS	0
+#define NA_RANDOM_R2T_OFFSETS		0
+#define NA_DEFAULT_ERL			0
+#define NA_DEFAULT_ERL_MAX		2
+#define NA_DEFAULT_ERL_MIN		0
+
+/* struct iscsi_tpg_attrib sanity values */
+#define TA_AUTHENTICATION		1
+#define TA_LOGIN_TIMEOUT		15
+#define TA_LOGIN_TIMEOUT_MAX		30
+#define TA_LOGIN_TIMEOUT_MIN		5
+#define TA_NETIF_TIMEOUT		2
+#define TA_NETIF_TIMEOUT_MAX		15
+#define TA_NETIF_TIMEOUT_MIN		2
+#define TA_GENERATE_NODE_ACLS		0
+#define TA_DEFAULT_CMDSN_DEPTH		16
+#define TA_DEFAULT_CMDSN_DEPTH_MAX	512
+#define TA_DEFAULT_CMDSN_DEPTH_MIN	1
+#define TA_CACHE_DYNAMIC_ACLS		0
+/* Enabled by default in demo mode (generic_node_acls=1) */
+#define TA_DEMO_MODE_WRITE_PROTECT	1
+/* Disabled by default in production mode w/ explict ACLs */
+#define TA_PROD_MODE_WRITE_PROTECT	0
+#define TA_CACHE_CORE_NPS		0
+
+enum tpg_np_network_transport_table {
+	ISCSI_TCP				= 0,
+	ISCSI_SCTP_TCP				= 1,
+	ISCSI_SCTP_UDP				= 2,
+	ISCSI_IWARP_TCP				= 3,
+	ISCSI_IWARP_SCTP			= 4,
+	ISCSI_INFINIBAND			= 5,
+};
+
+/* RFC-3720 7.1.4  Standard Connection State Diagram for a Target */
+enum target_conn_state_table {
+	TARG_CONN_STATE_FREE			= 0x1,
+	TARG_CONN_STATE_XPT_UP			= 0x3,
+	TARG_CONN_STATE_IN_LOGIN		= 0x4,
+	TARG_CONN_STATE_LOGGED_IN		= 0x5,
+	TARG_CONN_STATE_IN_LOGOUT		= 0x6,
+	TARG_CONN_STATE_LOGOUT_REQUESTED	= 0x7,
+	TARG_CONN_STATE_CLEANUP_WAIT		= 0x8,
+};
+
+/* RFC-3720 7.3.2  Session State Diagram for a Target */
+enum target_sess_state_table {
+	TARG_SESS_STATE_FREE			= 0x1,
+	TARG_SESS_STATE_ACTIVE			= 0x2,
+	TARG_SESS_STATE_LOGGED_IN		= 0x3,
+	TARG_SESS_STATE_FAILED			= 0x4,
+	TARG_SESS_STATE_IN_CONTINUE		= 0x5,
+};
+
+/* struct iscsi_data_count->type */
+enum data_count_type {
+	ISCSI_RX_DATA	= 1,
+	ISCSI_TX_DATA	= 2,
+};
+
+/* struct iscsi_datain_req->dr_complete */
+enum datain_req_comp_table {
+	DATAIN_COMPLETE_NORMAL			= 1,
+	DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY = 2,
+	DATAIN_COMPLETE_CONNECTION_RECOVERY	= 3,
+};
+
+/* struct iscsi_datain_req->recovery */
+enum datain_req_rec_table {
+	DATAIN_WITHIN_COMMAND_RECOVERY		= 1,
+	DATAIN_CONNECTION_RECOVERY		= 2,
+};
+
+/* struct iscsi_portal_group->state */
+enum tpg_state_table {
+	TPG_STATE_FREE				= 0,
+	TPG_STATE_ACTIVE			= 1,
+	TPG_STATE_INACTIVE			= 2,
+	TPG_STATE_COLD_RESET			= 3,
+};
+
+/* struct iscsi_tiqn->tiqn_state */
+enum tiqn_state_table {
+	TIQN_STATE_ACTIVE			= 1,
+	TIQN_STATE_SHUTDOWN			= 2,
+};
+
+/* struct iscsi_cmd->cmd_flags */
+enum cmd_flags_table {
+	ICF_GOT_LAST_DATAOUT			= 0x00000001,
+	ICF_GOT_DATACK_SNACK			= 0x00000002,
+	ICF_NON_IMMEDIATE_UNSOLICITED_DATA	= 0x00000004,
+	ICF_SENT_LAST_R2T			= 0x00000008,
+	ICF_WITHIN_COMMAND_RECOVERY		= 0x00000010,
+	ICF_CONTIG_MEMORY			= 0x00000020,
+	ICF_ATTACHED_TO_RQUEUE			= 0x00000040,
+	ICF_OOO_CMDSN				= 0x00000080,
+	ICF_REJECT_FAIL_CONN			= 0x00000100,
+};
+
+/* struct iscsi_cmd->i_state */
+enum cmd_i_state_table {
+	ISTATE_NO_STATE			= 0,
+	ISTATE_NEW_CMD			= 1,
+	ISTATE_DEFERRED_CMD		= 2,
+	ISTATE_UNSOLICITED_DATA		= 3,
+	ISTATE_RECEIVE_DATAOUT		= 4,
+	ISTATE_RECEIVE_DATAOUT_RECOVERY	= 5,
+	ISTATE_RECEIVED_LAST_DATAOUT	= 6,
+	ISTATE_WITHIN_DATAOUT_RECOVERY	= 7,
+	ISTATE_IN_CONNECTION_RECOVERY	= 8,
+	ISTATE_RECEIVED_TASKMGT		= 9,
+	ISTATE_SEND_ASYNCMSG		= 10,
+	ISTATE_SENT_ASYNCMSG		= 11,
+	ISTATE_SEND_DATAIN		= 12,
+	ISTATE_SEND_LAST_DATAIN		= 13,
+	ISTATE_SENT_LAST_DATAIN		= 14,
+	ISTATE_SEND_LOGOUTRSP		= 15,
+	ISTATE_SENT_LOGOUTRSP		= 16,
+	ISTATE_SEND_NOPIN		= 17,
+	ISTATE_SENT_NOPIN		= 18,
+	ISTATE_SEND_REJECT		= 19,
+	ISTATE_SENT_REJECT		= 20,
+	ISTATE_SEND_R2T			= 21,
+	ISTATE_SENT_R2T			= 22,
+	ISTATE_SEND_R2T_RECOVERY	= 23,
+	ISTATE_SENT_R2T_RECOVERY	= 24,
+	ISTATE_SEND_LAST_R2T		= 25,
+	ISTATE_SENT_LAST_R2T		= 26,
+	ISTATE_SEND_LAST_R2T_RECOVERY	= 27,
+	ISTATE_SENT_LAST_R2T_RECOVERY	= 28,
+	ISTATE_SEND_STATUS		= 29,
+	ISTATE_SEND_STATUS_BROKEN_PC	= 30,
+	ISTATE_SENT_STATUS		= 31,
+	ISTATE_SEND_STATUS_RECOVERY	= 32,
+	ISTATE_SENT_STATUS_RECOVERY	= 33,
+	ISTATE_SEND_TASKMGTRSP		= 34,
+	ISTATE_SENT_TASKMGTRSP		= 35,
+	ISTATE_SEND_TEXTRSP		= 36,
+	ISTATE_SENT_TEXTRSP		= 37,
+	ISTATE_SEND_NOPIN_WANT_RESPONSE	= 38,
+	ISTATE_SENT_NOPIN_WANT_RESPONSE	= 39,
+	ISTATE_SEND_NOPIN_NO_RESPONSE	= 40,
+	ISTATE_REMOVE			= 41,
+	ISTATE_FREE			= 42,
+};
+
+/* Used for iscsi_recover_cmdsn() return values */
+enum recover_cmdsn_ret_table {
+	CMDSN_ERROR_CANNOT_RECOVER	= -1,
+	CMDSN_NORMAL_OPERATION		= 0,
+	CMDSN_LOWER_THAN_EXP		= 1,
+	CMDSN_HIGHER_THAN_EXP		= 2,
+};
+
+/* Used for iscsi_handle_immediate_data() return values */
+enum immedate_data_ret_table {
+	IMMEDIATE_DATA_CANNOT_RECOVER	= -1,
+	IMMEDIATE_DATA_NORMAL_OPERATION = 0,
+	IMMEDIATE_DATA_ERL1_CRC_FAILURE = 1,
+};
+
+/* Used for iscsi_decide_dataout_action() return values */
+enum dataout_action_ret_table {
+	DATAOUT_CANNOT_RECOVER		= -1,
+	DATAOUT_NORMAL			= 0,
+	DATAOUT_SEND_R2T		= 1,
+	DATAOUT_SEND_TO_TRANSPORT	= 2,
+	DATAOUT_WITHIN_COMMAND_RECOVERY = 3,
+};
+
+/* Used for struct iscsi_node_auth->naf_flags */
+enum naf_flags_table {
+	NAF_USERID_SET			= 0x01,
+	NAF_PASSWORD_SET		= 0x02,
+	NAF_USERID_IN_SET		= 0x04,
+	NAF_PASSWORD_IN_SET		= 0x08,
+};
+
+/* Used by various struct timer_list to manage iSCSI specific state */
+enum iscsi_timer_flags_table {
+	ISCSI_TF_RUNNING		= 0x01,
+	ISCSI_TF_STOP			= 0x02,
+	ISCSI_TF_EXPIRED		= 0x04,
+};
+
+/* Used for struct iscsi_np->np_flags */
+enum np_flags_table {
+	NPF_IP_NETWORK		= 0x00,
+	NPF_SCTP_STRUCT_FILE	= 0x01 /* Bugfix */
+};
+
+/* Used for struct iscsi_np->np_thread_state */
+enum np_thread_state_table {
+	ISCSI_NP_THREAD_ACTIVE		= 1,
+	ISCSI_NP_THREAD_INACTIVE	= 2,
+	ISCSI_NP_THREAD_RESET		= 3,
+	ISCSI_NP_THREAD_SHUTDOWN	= 4,
+	ISCSI_NP_THREAD_EXIT		= 5,
+};
+
+struct iscsi_conn_ops {
+	u8	HeaderDigest;			/* [0,1] == [None,CRC32C] */
+	u8	DataDigest;			/* [0,1] == [None,CRC32C] */
+	u32	MaxRecvDataSegmentLength;	/* [512..2**24-1] */
+	u8	OFMarker;			/* [0,1] == [No,Yes] */
+	u8	IFMarker;			/* [0,1] == [No,Yes] */
+	u32	OFMarkInt;			/* [1..65535] */
+	u32	IFMarkInt;			/* [1..65535] */
+};
+
+struct iscsi_sess_ops {
+	char	InitiatorName[224];
+	char	InitiatorAlias[256];
+	char	TargetName[224];
+	char	TargetAlias[256];
+	char	TargetAddress[256];
+	u16	TargetPortalGroupTag;		/* [0..65535] */
+	u16	MaxConnections;			/* [1..65535] */
+	u8	InitialR2T;			/* [0,1] == [No,Yes] */
+	u8	ImmediateData;			/* [0,1] == [No,Yes] */
+	u32	MaxBurstLength;			/* [512..2**24-1] */
+	u32	FirstBurstLength;		/* [512..2**24-1] */
+	u16	DefaultTime2Wait;		/* [0..3600] */
+	u16	DefaultTime2Retain;		/* [0..3600] */
+	u16	MaxOutstandingR2T;		/* [1..65535] */
+	u8	DataPDUInOrder;			/* [0,1] == [No,Yes] */
+	u8	DataSequenceInOrder;		/* [0,1] == [No,Yes] */
+	u8	ErrorRecoveryLevel;		/* [0..2] */
+	u8	SessionType;			/* [0,1] == [Normal,Discovery]*/
+};
+
+struct iscsi_queue_req {
+	int			state;
+	struct iscsi_cmd	*cmd;
+	struct list_head	qr_list;
+};
+
+struct iscsi_data_count {
+	int			data_length;
+	int			sync_and_steering;
+	enum data_count_type	type;
+	u32			iov_count;
+	u32			ss_iov_count;
+	u32			ss_marker_count;
+	struct kvec		*iov;
+};
+
+struct iscsi_param_list {
+	struct list_head	param_list;
+	struct list_head	extra_response_list;
+};
+
+struct iscsi_datain_req {
+	enum datain_req_comp_table dr_complete;
+	int			generate_recovery_values;
+	enum datain_req_rec_table recovery;
+	u32			begrun;
+	u32			runlength;
+	u32			data_length;
+	u32			data_offset;
+	u32			data_offset_end;
+	u32			data_sn;
+	u32			next_burst_len;
+	u32			read_data_done;
+	u32			seq_send_order;
+	struct list_head	dr_list;
+} ____cacheline_aligned;
+
+struct iscsi_ooo_cmdsn {
+	u16			cid;
+	u32			batch_count;
+	u32			cmdsn;
+	u32			exp_cmdsn;
+	struct iscsi_cmd	*cmd;
+	struct list_head	ooo_list;
+} ____cacheline_aligned;
+
+struct iscsi_datain {
+	u8			flags;
+	u32			data_sn;
+	u32			length;
+	u32			offset;
+} ____cacheline_aligned;
+
+struct iscsi_r2t {
+	int			seq_complete;
+	int			recovery_r2t;
+	int			sent_r2t;
+	u32			r2t_sn;
+	u32			offset;
+	u32			targ_xfer_tag;
+	u32			xfer_len;
+	struct list_head	r2t_list;
+} ____cacheline_aligned;
+
+struct iscsi_cmd {
+	enum iscsi_timer_flags_table dataout_timer_flags;
+	/* DataOUT timeout retries */
+	u8			dataout_timeout_retries;
+	/* Within command recovery count */
+	u8			error_recovery_count;
+	/* iSCSI dependent state for out or order CmdSNs */
+	enum cmd_i_state_table	deferred_i_state;
+	/* iSCSI dependent state */
+	enum cmd_i_state_table	i_state;
+	/* Command is an immediate command (ISCSI_OP_IMMEDIATE set) */
+	u8			immediate_cmd;
+	/* Immediate data present */
+	u8			immediate_data;
+	/* iSCSI Opcode */
+	u8			iscsi_opcode;
+	/* iSCSI Response Code */
+	u8			iscsi_response;
+	/* Logout reason when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */
+	u8			logout_reason;
+	/* Logout response code when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */
+	u8			logout_response;
+	/* MaxCmdSN has been incremented */
+	u8			maxcmdsn_inc;
+	/* Immediate Unsolicited Dataout */
+	u8			unsolicited_data;
+	/* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */
+	u16			logout_cid;
+	/* Command flags */
+	enum cmd_flags_table	cmd_flags;
+	/* Initiator Task Tag assigned from Initiator */
+	u32			init_task_tag;
+	/* Target Transfer Tag assigned from Target */
+	u32			targ_xfer_tag;
+	/* CmdSN assigned from Initiator */
+	u32			cmd_sn;
+	/* ExpStatSN assigned from Initiator */
+	u32			exp_stat_sn;
+	/* StatSN assigned to this ITT */
+	u32			stat_sn;
+	/* DataSN Counter */
+	u32			data_sn;
+	/* R2TSN Counter */
+	u32			r2t_sn;
+	/* Last DataSN acknowledged via DataAck SNACK */
+	u32			acked_data_sn;
+	/* Used for echoing NOPOUT ping data */
+	u32			buf_ptr_size;
+	/* Used to store DataDigest */
+	u32			data_crc;
+	/* Total size in bytes associated with command */
+	u32			data_length;
+	/* Counter for MaxOutstandingR2T */
+	u32			outstanding_r2ts;
+	/* Next R2T Offset when DataSequenceInOrder=Yes */
+	u32			r2t_offset;
+	/* Iovec current and orig count for iscsi_cmd->iov_data */
+	u32			iov_data_count;
+	u32			orig_iov_data_count;
+	/* Number of miscellaneous iovecs used for IP stack calls */
+	u32			iov_misc_count;
+	/* Number of struct iscsi_pdu in struct iscsi_cmd->pdu_list */
+	u32			pdu_count;
+	/* Next struct iscsi_pdu to send in struct iscsi_cmd->pdu_list */
+	u32			pdu_send_order;
+	/* Current struct iscsi_pdu in struct iscsi_cmd->pdu_list */
+	u32			pdu_start;
+	u32			residual_count;
+	/* Next struct iscsi_seq to send in struct iscsi_cmd->seq_list */
+	u32			seq_send_order;
+	/* Number of struct iscsi_seq in struct iscsi_cmd->seq_list */
+	u32			seq_count;
+	/* Current struct iscsi_seq in struct iscsi_cmd->seq_list */
+	u32			seq_no;
+	/* Lowest offset in current DataOUT sequence */
+	u32			seq_start_offset;
+	/* Highest offset in current DataOUT sequence */
+	u32			seq_end_offset;
+	/* Total size in bytes received so far of READ data */
+	u32			read_data_done;
+	/* Total size in bytes received so far of WRITE data */
+	u32			write_data_done;
+	/* Counter for FirstBurstLength key */
+	u32			first_burst_len;
+	/* Counter for MaxBurstLength key */
+	u32			next_burst_len;
+	/* Transfer size used for IP stack calls */
+	u32			tx_size;
+	/* Buffer used for various purposes */
+	void			*buf_ptr;
+	/* See include/linux/dma-mapping.h */
+	enum dma_data_direction	data_direction;
+	/* iSCSI PDU Header + CRC */
+	unsigned char		pdu[ISCSI_HDR_LEN + ISCSI_CRC_LEN];
+	/* Number of times struct iscsi_cmd is present in immediate queue */
+	atomic_t		immed_queue_count;
+	atomic_t		response_queue_count;
+	atomic_t		transport_sent;
+	spinlock_t		datain_lock;
+	spinlock_t		dataout_timeout_lock;
+	/* spinlock for protecting struct iscsi_cmd->i_state */
+	spinlock_t		istate_lock;
+	/* spinlock for adding within command recovery entries */
+	spinlock_t		error_lock;
+	/* spinlock for adding R2Ts */
+	spinlock_t		r2t_lock;
+	/* DataIN List */
+	struct list_head	datain_list;
+	/* R2T List */
+	struct list_head	cmd_r2t_list;
+	struct completion	reject_comp;
+	/* Timer for DataOUT */
+	struct timer_list	dataout_timer;
+	/* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */
+	struct kvec		*iov_data;
+	/* Iovecs for miscellaneous purposes */
+#define ISCSI_MISC_IOVECS			5
+	struct kvec		iov_misc[ISCSI_MISC_IOVECS];
+	/* Array of struct iscsi_pdu used for DataPDUInOrder=No */
+	struct iscsi_pdu	*pdu_list;
+	/* Current struct iscsi_pdu used for DataPDUInOrder=No */
+	struct iscsi_pdu	*pdu_ptr;
+	/* Array of struct iscsi_seq used for DataSequenceInOrder=No */
+	struct iscsi_seq	*seq_list;
+	/* Current struct iscsi_seq used for DataSequenceInOrder=No */
+	struct iscsi_seq	*seq_ptr;
+	/* TMR Request when iscsi_opcode == ISCSI_OP_SCSI_TMFUNC */
+	struct iscsi_tmr_req	*tmr_req;
+	/* Connection this command is alligient to */
+	struct iscsi_conn	*conn;
+	/* Pointer to connection recovery entry */
+	struct iscsi_conn_recovery *cr;
+	/* Session the command is part of,  used for connection recovery */
+	struct iscsi_session	*sess;
+	/* list_head for connection list */
+	struct list_head	i_list;
+	/* The TCM I/O descriptor that is accessed via container_of() */
+	struct se_cmd		se_cmd;
+	/* Sense buffer that will be mapped into outgoing status */
+#define ISCSI_SENSE_BUFFER_LEN          (TRANSPORT_SENSE_BUFFER + 2)
+	unsigned char		sense_buffer[ISCSI_SENSE_BUFFER_LEN];
+
+	struct scatterlist	*t_mem_sg;
+	u32			t_mem_sg_nents;
+
+	u32			padding;
+	u8			pad_bytes[4];
+
+	struct scatterlist	*first_data_sg;
+	u32			first_data_sg_off;
+	u32			kmapped_nents;
+
+}  ____cacheline_aligned;
+
+struct iscsi_tmr_req {
+	bool			task_reassign:1;
+	u32			ref_cmd_sn;
+	u32			exp_data_sn;
+	struct iscsi_conn_recovery *conn_recovery;
+	struct se_tmr_req	*se_tmr_req;
+};
+
+struct iscsi_conn {
+	/* Authentication Successful for this connection */
+	u8			auth_complete;
+	/* State connection is currently in */
+	u8			conn_state;
+	u8			conn_logout_reason;
+	u8			network_transport;
+	enum iscsi_timer_flags_table nopin_timer_flags;
+	enum iscsi_timer_flags_table nopin_response_timer_flags;
+	u8			tx_immediate_queue;
+	u8			tx_response_queue;
+	/* Used to know what thread encountered a transport failure */
+	u8			which_thread;
+	/* connection id assigned by the Initiator */
+	u16			cid;
+	/* Remote TCP Port */
+	u16			login_port;
+	int			net_size;
+	u32			auth_id;
+#define CONNFLAG_SCTP_STRUCT_FILE			0x01
+	u32			conn_flags;
+	/* Used for iscsi_tx_login_rsp() */
+	u32			login_itt;
+	u32			exp_statsn;
+	/* Per connection status sequence number */
+	u32			stat_sn;
+	/* IFMarkInt's Current Value */
+	u32			if_marker;
+	/* OFMarkInt's Current Value */
+	u32			of_marker;
+	/* Used for calculating OFMarker offset to next PDU */
+	u32			of_marker_offset;
+	/* Complete Bad PDU for sending reject */
+	unsigned char		bad_hdr[ISCSI_HDR_LEN];
+#define IPV6_ADDRESS_SPACE				48
+	unsigned char		login_ip[IPV6_ADDRESS_SPACE];
+	int			conn_usage_count;
+	int			conn_waiting_on_uc;
+	atomic_t		check_immediate_queue;
+	atomic_t		conn_logout_remove;
+	atomic_t		connection_exit;
+	atomic_t		connection_recovery;
+	atomic_t		connection_reinstatement;
+	atomic_t		connection_wait;
+	atomic_t		connection_wait_rcfr;
+	atomic_t		sleep_on_conn_wait_comp;
+	atomic_t		transport_failed;
+	struct completion	conn_post_wait_comp;
+	struct completion	conn_wait_comp;
+	struct completion	conn_wait_rcfr_comp;
+	struct completion	conn_waiting_on_uc_comp;
+	struct completion	conn_logout_comp;
+	struct completion	tx_half_close_comp;
+	struct completion	rx_half_close_comp;
+	/* socket used by this connection */
+	struct socket		*sock;
+	struct timer_list	nopin_timer;
+	struct timer_list	nopin_response_timer;
+	struct timer_list	transport_timer;
+	/* Spinlock used for add/deleting cmd's from conn_cmd_list */
+	spinlock_t		cmd_lock;
+	spinlock_t		conn_usage_lock;
+	spinlock_t		immed_queue_lock;
+	spinlock_t		nopin_timer_lock;
+	spinlock_t		response_queue_lock;
+	spinlock_t		state_lock;
+	/* libcrypto RX and TX contexts for crc32c */
+	struct hash_desc	conn_rx_hash;
+	struct hash_desc	conn_tx_hash;
+	/* Used for scheduling TX and RX connection kthreads */
+	cpumask_var_t		conn_cpumask;
+	int			conn_rx_reset_cpumask:1;
+	int			conn_tx_reset_cpumask:1;
+	/* list_head of struct iscsi_cmd for this connection */
+	struct list_head	conn_cmd_list;
+	struct list_head	immed_queue_list;
+	struct list_head	response_queue_list;
+	struct iscsi_conn_ops	*conn_ops;
+	struct iscsi_param_list	*param_list;
+	/* Used for per connection auth state machine */
+	void			*auth_protocol;
+	struct iscsi_login_thread_s *login_thread;
+	struct iscsi_portal_group *tpg;
+	/* Pointer to parent session */
+	struct iscsi_session	*sess;
+	/* Pointer to thread_set in use for this conn's threads */
+	struct iscsi_thread_set	*thread_set;
+	/* list_head for session connection list */
+	struct list_head	conn_list;
+} ____cacheline_aligned;
+
+struct iscsi_conn_recovery {
+	u16			cid;
+	u32			cmd_count;
+	u32			maxrecvdatasegmentlength;
+	int			ready_for_reallegiance;
+	struct list_head	conn_recovery_cmd_list;
+	spinlock_t		conn_recovery_cmd_lock;
+	struct timer_list	time2retain_timer;
+	struct iscsi_session	*sess;
+	struct list_head	cr_list;
+}  ____cacheline_aligned;
+
+struct iscsi_session {
+	u8			initiator_vendor;
+	u8			isid[6];
+	enum iscsi_timer_flags_table time2retain_timer_flags;
+	u8			version_active;
+	u16			cid_called;
+	u16			conn_recovery_count;
+	u16			tsih;
+	/* state session is currently in */
+	u32			session_state;
+	/* session wide counter: initiator assigned task tag */
+	u32			init_task_tag;
+	/* session wide counter: target assigned task tag */
+	u32			targ_xfer_tag;
+	u32			cmdsn_window;
+
+	/* protects cmdsn values */
+	struct mutex		cmdsn_mutex;
+	/* session wide counter: expected command sequence number */
+	u32			exp_cmd_sn;
+	/* session wide counter: maximum allowed command sequence number */
+	u32			max_cmd_sn;
+	struct list_head	sess_ooo_cmdsn_list;
+
+	/* LIO specific session ID */
+	u32			sid;
+	char			auth_type[8];
+	/* unique within the target */
+	int			session_index;
+	/* Used for session reference counting */
+	int			session_usage_count;
+	int			session_waiting_on_uc;
+	u32			cmd_pdus;
+	u32			rsp_pdus;
+	u64			tx_data_octets;
+	u64			rx_data_octets;
+	u32			conn_digest_errors;
+	u32			conn_timeout_errors;
+	u64			creation_time;
+	spinlock_t		session_stats_lock;
+	/* Number of active connections */
+	atomic_t		nconn;
+	atomic_t		session_continuation;
+	atomic_t		session_fall_back_to_erl0;
+	atomic_t		session_logout;
+	atomic_t		session_reinstatement;
+	atomic_t		session_stop_active;
+	atomic_t		sleep_on_sess_wait_comp;
+	atomic_t		transport_wait_cmds;
+	/* connection list */
+	struct list_head	sess_conn_list;
+	struct list_head	cr_active_list;
+	struct list_head	cr_inactive_list;
+	spinlock_t		conn_lock;
+	spinlock_t		cr_a_lock;
+	spinlock_t		cr_i_lock;
+	spinlock_t		session_usage_lock;
+	spinlock_t		ttt_lock;
+	struct completion	async_msg_comp;
+	struct completion	reinstatement_comp;
+	struct completion	session_wait_comp;
+	struct completion	session_waiting_on_uc_comp;
+	struct timer_list	time2retain_timer;
+	struct iscsi_sess_ops	*sess_ops;
+	struct se_session	*se_sess;
+	struct iscsi_portal_group *tpg;
+} ____cacheline_aligned;
+
+struct iscsi_login {
+	u8 auth_complete;
+	u8 checked_for_existing;
+	u8 current_stage;
+	u8 leading_connection;
+	u8 first_request;
+	u8 version_min;
+	u8 version_max;
+	char isid[6];
+	u32 cmd_sn;
+	u32 init_task_tag;
+	u32 initial_exp_statsn;
+	u32 rsp_length;
+	u16 cid;
+	u16 tsih;
+	char *req;
+	char *rsp;
+	char *req_buf;
+	char *rsp_buf;
+} ____cacheline_aligned;
+
+struct iscsi_node_attrib {
+	u32			dataout_timeout;
+	u32			dataout_timeout_retries;
+	u32			default_erl;
+	u32			nopin_timeout;
+	u32			nopin_response_timeout;
+	u32			random_datain_pdu_offsets;
+	u32			random_datain_seq_offsets;
+	u32			random_r2t_offsets;
+	u32			tmr_cold_reset;
+	u32			tmr_warm_reset;
+	struct iscsi_node_acl *nacl;
+};
+
+struct se_dev_entry_s;
+
+struct iscsi_node_auth {
+	enum naf_flags_table	naf_flags;
+	int			authenticate_target;
+	/* Used for iscsit_global->discovery_auth,
+	 * set to zero (auth disabled) by default */
+	int			enforce_discovery_auth;
+#define MAX_USER_LEN				256
+#define MAX_PASS_LEN				256
+	char			userid[MAX_USER_LEN];
+	char			password[MAX_PASS_LEN];
+	char			userid_mutual[MAX_USER_LEN];
+	char			password_mutual[MAX_PASS_LEN];
+};
+
+#include "iscsi_target_stat.h"
+
+struct iscsi_node_stat_grps {
+	struct config_group	iscsi_sess_stats_group;
+	struct config_group	iscsi_conn_stats_group;
+};
+
+struct iscsi_node_acl {
+	struct iscsi_node_attrib node_attrib;
+	struct iscsi_node_auth	node_auth;
+	struct iscsi_node_stat_grps node_stat_grps;
+	struct se_node_acl	se_node_acl;
+};
+
+#define NODE_STAT_GRPS(nacl)	(&(nacl)->node_stat_grps)
+
+#define ISCSI_NODE_ATTRIB(t)	(&(t)->node_attrib)
+#define ISCSI_NODE_AUTH(t)	(&(t)->node_auth)
+
+struct iscsi_tpg_attrib {
+	u32			authentication;
+	u32			login_timeout;
+	u32			netif_timeout;
+	u32			generate_node_acls;
+	u32			cache_dynamic_acls;
+	u32			default_cmdsn_depth;
+	u32			demo_mode_write_protect;
+	u32			prod_mode_write_protect;
+	struct iscsi_portal_group *tpg;
+};
+
+struct iscsi_np {
+	int			np_network_transport;
+	int			np_ip_proto;
+	int			np_sock_type;
+	enum np_thread_state_table np_thread_state;
+	enum iscsi_timer_flags_table np_login_timer_flags;
+	u32			np_exports;
+	enum np_flags_table	np_flags;
+	unsigned char		np_ip[IPV6_ADDRESS_SPACE];
+	u16			np_port;
+	spinlock_t		np_thread_lock;
+	struct completion	np_restart_comp;
+	struct socket		*np_socket;
+	struct __kernel_sockaddr_storage np_sockaddr;
+	struct task_struct	*np_thread;
+	struct timer_list	np_login_timer;
+	struct iscsi_portal_group *np_login_tpg;
+	struct list_head	np_list;
+} ____cacheline_aligned;
+
+struct iscsi_tpg_np {
+	struct iscsi_np		*tpg_np;
+	struct iscsi_portal_group *tpg;
+	struct iscsi_tpg_np	*tpg_np_parent;
+	struct list_head	tpg_np_list;
+	struct list_head	tpg_np_child_list;
+	struct list_head	tpg_np_parent_list;
+	struct se_tpg_np	se_tpg_np;
+	spinlock_t		tpg_np_parent_lock;
+};
+
+struct iscsi_portal_group {
+	unsigned char		tpg_chap_id;
+	/* TPG State */
+	enum tpg_state_table	tpg_state;
+	/* Target Portal Group Tag */
+	u16			tpgt;
+	/* Id assigned to target sessions */
+	u16			ntsih;
+	/* Number of active sessions */
+	u32			nsessions;
+	/* Number of Network Portals available for this TPG */
+	u32			num_tpg_nps;
+	/* Per TPG LIO specific session ID. */
+	u32			sid;
+	/* Spinlock for adding/removing Network Portals */
+	spinlock_t		tpg_np_lock;
+	spinlock_t		tpg_state_lock;
+	struct se_portal_group tpg_se_tpg;
+	struct mutex		tpg_access_lock;
+	struct mutex		np_login_lock;
+	struct iscsi_tpg_attrib	tpg_attrib;
+	/* Pointer to default list of iSCSI parameters for TPG */
+	struct iscsi_param_list	*param_list;
+	struct iscsi_tiqn	*tpg_tiqn;
+	struct list_head	tpg_gnp_list;
+	struct list_head	tpg_list;
+} ____cacheline_aligned;
+
+#define ISCSI_TPG_C(c)		((struct iscsi_portal_group *)(c)->tpg)
+#define ISCSI_TPG_LUN(c, l)  ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l])
+#define ISCSI_TPG_S(s)		((struct iscsi_portal_group *)(s)->tpg)
+#define ISCSI_TPG_ATTRIB(t)	(&(t)->tpg_attrib)
+#define SE_TPG(tpg)		(&(tpg)->tpg_se_tpg)
+
+struct iscsi_wwn_stat_grps {
+	struct config_group	iscsi_stat_group;
+	struct config_group	iscsi_instance_group;
+	struct config_group	iscsi_sess_err_group;
+	struct config_group	iscsi_tgt_attr_group;
+	struct config_group	iscsi_login_stats_group;
+	struct config_group	iscsi_logout_stats_group;
+};
+
+struct iscsi_tiqn {
+#define ISCSI_IQN_LEN				224
+	unsigned char		tiqn[ISCSI_IQN_LEN];
+	enum tiqn_state_table	tiqn_state;
+	int			tiqn_access_count;
+	u32			tiqn_active_tpgs;
+	u32			tiqn_ntpgs;
+	u32			tiqn_num_tpg_nps;
+	u32			tiqn_nsessions;
+	struct list_head	tiqn_list;
+	struct list_head	tiqn_tpg_list;
+	spinlock_t		tiqn_state_lock;
+	spinlock_t		tiqn_tpg_lock;
+	struct se_wwn		tiqn_wwn;
+	struct iscsi_wwn_stat_grps tiqn_stat_grps;
+	int			tiqn_index;
+	struct iscsi_sess_err_stats  sess_err_stats;
+	struct iscsi_login_stats     login_stats;
+	struct iscsi_logout_stats    logout_stats;
+} ____cacheline_aligned;
+
+#define WWN_STAT_GRPS(tiqn)	(&(tiqn)->tiqn_stat_grps)
+
+struct iscsit_global {
+	/* In core shutdown */
+	u32			in_shutdown;
+	u32			active_ts;
+	/* Unique identifier used for the authentication daemon */
+	u32			auth_id;
+	u32			inactive_ts;
+	/* Thread Set bitmap count */
+	int			ts_bitmap_count;
+	/* Thread Set bitmap pointer */
+	unsigned long		*ts_bitmap;
+	/* Used for iSCSI discovery session authentication */
+	struct iscsi_node_acl	discovery_acl;
+	struct iscsi_portal_group	*discovery_tpg;
+};
+
+#endif /* ISCSI_TARGET_CORE_H */
diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c
new file mode 100644
index 0000000..8c04951
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_datain_values.c
@@ -0,0 +1,531 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target DataIN value generation functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_datain_values.h"
+
+struct iscsi_datain_req *iscsit_allocate_datain_req(void)
+{
+	struct iscsi_datain_req *dr;
+
+	dr = kmem_cache_zalloc(lio_dr_cache, GFP_ATOMIC);
+	if (!dr) {
+		pr_err("Unable to allocate memory for"
+				" struct iscsi_datain_req\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&dr->dr_list);
+
+	return dr;
+}
+
+void iscsit_attach_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr)
+{
+	spin_lock(&cmd->datain_lock);
+	list_add_tail(&dr->dr_list, &cmd->datain_list);
+	spin_unlock(&cmd->datain_lock);
+}
+
+void iscsit_free_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr)
+{
+	spin_lock(&cmd->datain_lock);
+	list_del(&dr->dr_list);
+	spin_unlock(&cmd->datain_lock);
+
+	kmem_cache_free(lio_dr_cache, dr);
+}
+
+void iscsit_free_all_datain_reqs(struct iscsi_cmd *cmd)
+{
+	struct iscsi_datain_req *dr, *dr_tmp;
+
+	spin_lock(&cmd->datain_lock);
+	list_for_each_entry_safe(dr, dr_tmp, &cmd->datain_list, dr_list) {
+		list_del(&dr->dr_list);
+		kmem_cache_free(lio_dr_cache, dr);
+	}
+	spin_unlock(&cmd->datain_lock);
+}
+
+struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *cmd)
+{
+	struct iscsi_datain_req *dr;
+
+	if (list_empty(&cmd->datain_list)) {
+		pr_err("cmd->datain_list is empty for ITT:"
+			" 0x%08x\n", cmd->init_task_tag);
+		return NULL;
+	}
+	list_for_each_entry(dr, &cmd->datain_list, dr_list)
+		break;
+
+	return dr;
+}
+
+/*
+ *	For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=Yes.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_yes(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain *datain)
+{
+	u32 next_burst_len, read_data_done, read_data_left;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_datain_req *dr;
+
+	dr = iscsit_get_datain_req(cmd);
+	if (!dr)
+		return NULL;
+
+	if (dr->recovery && dr->generate_recovery_values) {
+		if (iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+					cmd, dr) < 0)
+			return NULL;
+
+		dr->generate_recovery_values = 0;
+	}
+
+	next_burst_len = (!dr->recovery) ?
+			cmd->next_burst_len : dr->next_burst_len;
+	read_data_done = (!dr->recovery) ?
+			cmd->read_data_done : dr->read_data_done;
+
+	read_data_left = (cmd->data_length - read_data_done);
+	if (!read_data_left) {
+		pr_err("ITT: 0x%08x read_data_left is zero!\n",
+				cmd->init_task_tag);
+		return NULL;
+	}
+
+	if ((read_data_left <= conn->conn_ops->MaxRecvDataSegmentLength) &&
+	    (read_data_left <= (conn->sess->sess_ops->MaxBurstLength -
+	     next_burst_len))) {
+		datain->length = read_data_left;
+
+		datain->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS);
+		if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+			datain->flags |= ISCSI_FLAG_DATA_ACK;
+	} else {
+		if ((next_burst_len +
+		     conn->conn_ops->MaxRecvDataSegmentLength) <
+		     conn->sess->sess_ops->MaxBurstLength) {
+			datain->length =
+				conn->conn_ops->MaxRecvDataSegmentLength;
+			next_burst_len += datain->length;
+		} else {
+			datain->length = (conn->sess->sess_ops->MaxBurstLength -
+					  next_burst_len);
+			next_burst_len = 0;
+
+			datain->flags |= ISCSI_FLAG_CMD_FINAL;
+			if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+				datain->flags |= ISCSI_FLAG_DATA_ACK;
+		}
+	}
+
+	datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+	datain->offset = read_data_done;
+
+	if (!dr->recovery) {
+		cmd->next_burst_len = next_burst_len;
+		cmd->read_data_done += datain->length;
+	} else {
+		dr->next_burst_len = next_burst_len;
+		dr->read_data_done += datain->length;
+	}
+
+	if (!dr->recovery) {
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+			dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+		return dr;
+	}
+
+	if (!dr->runlength) {
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	} else {
+		if ((dr->begrun + dr->runlength) == dr->data_sn) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	}
+
+	return dr;
+}
+
+/*
+ *	For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=Yes.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_no_and_yes(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain *datain)
+{
+	u32 offset, read_data_done, read_data_left, seq_send_order;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_datain_req *dr;
+	struct iscsi_seq *seq;
+
+	dr = iscsit_get_datain_req(cmd);
+	if (!dr)
+		return NULL;
+
+	if (dr->recovery && dr->generate_recovery_values) {
+		if (iscsit_create_recovery_datain_values_datasequenceinorder_no(
+					cmd, dr) < 0)
+			return NULL;
+
+		dr->generate_recovery_values = 0;
+	}
+
+	read_data_done = (!dr->recovery) ?
+			cmd->read_data_done : dr->read_data_done;
+	seq_send_order = (!dr->recovery) ?
+			cmd->seq_send_order : dr->seq_send_order;
+
+	read_data_left = (cmd->data_length - read_data_done);
+	if (!read_data_left) {
+		pr_err("ITT: 0x%08x read_data_left is zero!\n",
+				cmd->init_task_tag);
+		return NULL;
+	}
+
+	seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order);
+	if (!seq)
+		return NULL;
+
+	seq->sent = 1;
+
+	if (!dr->recovery && !seq->next_burst_len)
+		seq->first_datasn = cmd->data_sn;
+
+	offset = (seq->offset + seq->next_burst_len);
+
+	if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+	     cmd->data_length) {
+		datain->length = (cmd->data_length - offset);
+		datain->offset = offset;
+
+		datain->flags |= ISCSI_FLAG_CMD_FINAL;
+		if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+			datain->flags |= ISCSI_FLAG_DATA_ACK;
+
+		seq->next_burst_len = 0;
+		seq_send_order++;
+	} else {
+		if ((seq->next_burst_len +
+		     conn->conn_ops->MaxRecvDataSegmentLength) <
+		     conn->sess->sess_ops->MaxBurstLength) {
+			datain->length =
+				conn->conn_ops->MaxRecvDataSegmentLength;
+			datain->offset = (seq->offset + seq->next_burst_len);
+
+			seq->next_burst_len += datain->length;
+		} else {
+			datain->length = (conn->sess->sess_ops->MaxBurstLength -
+					  seq->next_burst_len);
+			datain->offset = (seq->offset + seq->next_burst_len);
+
+			datain->flags |= ISCSI_FLAG_CMD_FINAL;
+			if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+				datain->flags |= ISCSI_FLAG_DATA_ACK;
+
+			seq->next_burst_len = 0;
+			seq_send_order++;
+		}
+	}
+
+	if ((read_data_done + datain->length) == cmd->data_length)
+		datain->flags |= ISCSI_FLAG_DATA_STATUS;
+
+	datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+	if (!dr->recovery) {
+		cmd->seq_send_order = seq_send_order;
+		cmd->read_data_done += datain->length;
+	} else {
+		dr->seq_send_order = seq_send_order;
+		dr->read_data_done += datain->length;
+	}
+
+	if (!dr->recovery) {
+		if (datain->flags & ISCSI_FLAG_CMD_FINAL)
+			seq->last_datasn = datain->data_sn;
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+			dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+		return dr;
+	}
+
+	if (!dr->runlength) {
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	} else {
+		if ((dr->begrun + dr->runlength) == dr->data_sn) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	}
+
+	return dr;
+}
+
+/*
+ *	For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=No.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_no(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain *datain)
+{
+	u32 next_burst_len, read_data_done, read_data_left;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_datain_req *dr;
+	struct iscsi_pdu *pdu;
+
+	dr = iscsit_get_datain_req(cmd);
+	if (!dr)
+		return NULL;
+
+	if (dr->recovery && dr->generate_recovery_values) {
+		if (iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+					cmd, dr) < 0)
+			return NULL;
+
+		dr->generate_recovery_values = 0;
+	}
+
+	next_burst_len = (!dr->recovery) ?
+			cmd->next_burst_len : dr->next_burst_len;
+	read_data_done = (!dr->recovery) ?
+			cmd->read_data_done : dr->read_data_done;
+
+	read_data_left = (cmd->data_length - read_data_done);
+	if (!read_data_left) {
+		pr_err("ITT: 0x%08x read_data_left is zero!\n",
+				cmd->init_task_tag);
+		return dr;
+	}
+
+	pdu = iscsit_get_pdu_holder_for_seq(cmd, NULL);
+	if (!pdu)
+		return dr;
+
+	if ((read_data_done + pdu->length) == cmd->data_length) {
+		pdu->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS);
+		if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+			pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+		next_burst_len = 0;
+	} else {
+		if ((next_burst_len + conn->conn_ops->MaxRecvDataSegmentLength) <
+		     conn->sess->sess_ops->MaxBurstLength)
+			next_burst_len += pdu->length;
+		else {
+			pdu->flags |= ISCSI_FLAG_CMD_FINAL;
+			if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+				pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+			next_burst_len = 0;
+		}
+	}
+
+	pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+	if (!dr->recovery) {
+		cmd->next_burst_len = next_burst_len;
+		cmd->read_data_done += pdu->length;
+	} else {
+		dr->next_burst_len = next_burst_len;
+		dr->read_data_done += pdu->length;
+	}
+
+	datain->flags = pdu->flags;
+	datain->length = pdu->length;
+	datain->offset = pdu->offset;
+	datain->data_sn = pdu->data_sn;
+
+	if (!dr->recovery) {
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+			dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+		return dr;
+	}
+
+	if (!dr->runlength) {
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	} else {
+		if ((dr->begrun + dr->runlength) == dr->data_sn) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	}
+
+	return dr;
+}
+
+/*
+ *	For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=No.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_no_and_no(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain *datain)
+{
+	u32 read_data_done, read_data_left, seq_send_order;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_datain_req *dr;
+	struct iscsi_pdu *pdu;
+	struct iscsi_seq *seq = NULL;
+
+	dr = iscsit_get_datain_req(cmd);
+	if (!dr)
+		return NULL;
+
+	if (dr->recovery && dr->generate_recovery_values) {
+		if (iscsit_create_recovery_datain_values_datasequenceinorder_no(
+					cmd, dr) < 0)
+			return NULL;
+
+		dr->generate_recovery_values = 0;
+	}
+
+	read_data_done = (!dr->recovery) ?
+			cmd->read_data_done : dr->read_data_done;
+	seq_send_order = (!dr->recovery) ?
+			cmd->seq_send_order : dr->seq_send_order;
+
+	read_data_left = (cmd->data_length - read_data_done);
+	if (!read_data_left) {
+		pr_err("ITT: 0x%08x read_data_left is zero!\n",
+				cmd->init_task_tag);
+		return NULL;
+	}
+
+	seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order);
+	if (!seq)
+		return NULL;
+
+	seq->sent = 1;
+
+	if (!dr->recovery && !seq->next_burst_len)
+		seq->first_datasn = cmd->data_sn;
+
+	pdu = iscsit_get_pdu_holder_for_seq(cmd, seq);
+	if (!pdu)
+		return NULL;
+
+	if (seq->pdu_send_order == seq->pdu_count) {
+		pdu->flags |= ISCSI_FLAG_CMD_FINAL;
+		if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+			pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+		seq->next_burst_len = 0;
+		seq_send_order++;
+	} else
+		seq->next_burst_len += pdu->length;
+
+	if ((read_data_done + pdu->length) == cmd->data_length)
+		pdu->flags |= ISCSI_FLAG_DATA_STATUS;
+
+	pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+	if (!dr->recovery) {
+		cmd->seq_send_order = seq_send_order;
+		cmd->read_data_done += pdu->length;
+	} else {
+		dr->seq_send_order = seq_send_order;
+		dr->read_data_done += pdu->length;
+	}
+
+	datain->flags = pdu->flags;
+	datain->length = pdu->length;
+	datain->offset = pdu->offset;
+	datain->data_sn = pdu->data_sn;
+
+	if (!dr->recovery) {
+		if (datain->flags & ISCSI_FLAG_CMD_FINAL)
+			seq->last_datasn = datain->data_sn;
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+			dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+		return dr;
+	}
+
+	if (!dr->runlength) {
+		if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	} else {
+		if ((dr->begrun + dr->runlength) == dr->data_sn) {
+			dr->dr_complete =
+			    (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+				DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+				DATAIN_COMPLETE_CONNECTION_RECOVERY;
+		}
+	}
+
+	return dr;
+}
+
+struct iscsi_datain_req *iscsit_get_datain_values(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain *datain)
+{
+	struct iscsi_conn *conn = cmd->conn;
+
+	if (conn->sess->sess_ops->DataSequenceInOrder &&
+	    conn->sess->sess_ops->DataPDUInOrder)
+		return iscsit_set_datain_values_yes_and_yes(cmd, datain);
+	else if (!conn->sess->sess_ops->DataSequenceInOrder &&
+		  conn->sess->sess_ops->DataPDUInOrder)
+		return iscsit_set_datain_values_no_and_yes(cmd, datain);
+	else if (conn->sess->sess_ops->DataSequenceInOrder &&
+		 !conn->sess->sess_ops->DataPDUInOrder)
+		return iscsit_set_datain_values_yes_and_no(cmd, datain);
+	else if (!conn->sess->sess_ops->DataSequenceInOrder &&
+		   !conn->sess->sess_ops->DataPDUInOrder)
+		return iscsit_set_datain_values_no_and_no(cmd, datain);
+
+	return NULL;
+}
diff --git a/drivers/target/iscsi/iscsi_target_datain_values.h b/drivers/target/iscsi/iscsi_target_datain_values.h
new file mode 100644
index 0000000..646429a
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_datain_values.h
@@ -0,0 +1,12 @@
+#ifndef ISCSI_TARGET_DATAIN_VALUES_H
+#define ISCSI_TARGET_DATAIN_VALUES_H
+
+extern struct iscsi_datain_req *iscsit_allocate_datain_req(void);
+extern void iscsit_attach_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *);
+extern void iscsit_free_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *);
+extern void iscsit_free_all_datain_reqs(struct iscsi_cmd *);
+extern struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *);
+extern struct iscsi_datain_req *iscsit_get_datain_values(struct iscsi_cmd *,
+			struct iscsi_datain *);
+
+#endif   /*** ISCSI_TARGET_DATAIN_VALUES_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c
new file mode 100644
index 0000000..a19fa5e
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_device.c
@@ -0,0 +1,87 @@
+/*******************************************************************************
+ * This file contains the iSCSI Virtual Device and Disk Transport
+ * agnostic related functions.
+ *
+ \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/scsi_device.h>
+#include <target/target_core_base.h>
+#include <target/target_core_device.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+
+int iscsit_get_lun_for_tmr(
+	struct iscsi_cmd *cmd,
+	u64 lun)
+{
+	u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+
+	return transport_lookup_tmr_lun(&cmd->se_cmd, unpacked_lun);
+}
+
+int iscsit_get_lun_for_cmd(
+	struct iscsi_cmd *cmd,
+	unsigned char *cdb,
+	u64 lun)
+{
+	u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+
+	return transport_lookup_cmd_lun(&cmd->se_cmd, unpacked_lun);
+}
+
+void iscsit_determine_maxcmdsn(struct iscsi_session *sess)
+{
+	struct se_node_acl *se_nacl;
+
+	/*
+	 * This is a discovery session, the single queue slot was already
+	 * assigned in iscsi_login_zero_tsih().  Since only Logout and
+	 * Text Opcodes are allowed during discovery we do not have to worry
+	 * about the HBA's queue depth here.
+	 */
+	if (sess->sess_ops->SessionType)
+		return;
+
+	se_nacl = sess->se_sess->se_node_acl;
+
+	/*
+	 * This is a normal session, set the Session's CmdSN window to the
+	 * struct se_node_acl->queue_depth.  The value in struct se_node_acl->queue_depth
+	 * has already been validated as a legal value in
+	 * core_set_queue_depth_for_node().
+	 */
+	sess->cmdsn_window = se_nacl->queue_depth;
+	sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1;
+}
+
+void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess)
+{
+	if (cmd->immediate_cmd || cmd->maxcmdsn_inc)
+		return;
+
+	cmd->maxcmdsn_inc = 1;
+
+	mutex_lock(&sess->cmdsn_mutex);
+	sess->max_cmd_sn += 1;
+	pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn);
+	mutex_unlock(&sess->cmdsn_mutex);
+}
diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h
new file mode 100644
index 0000000..bef1cad
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_device.h
@@ -0,0 +1,9 @@
+#ifndef ISCSI_TARGET_DEVICE_H
+#define ISCSI_TARGET_DEVICE_H
+
+extern int iscsit_get_lun_for_tmr(struct iscsi_cmd *, u64);
+extern int iscsit_get_lun_for_cmd(struct iscsi_cmd *, unsigned char *, u64);
+extern void iscsit_determine_maxcmdsn(struct iscsi_session *);
+extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *);
+
+#endif /* ISCSI_TARGET_DEVICE_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
new file mode 100644
index 0000000..b7ffc3c
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -0,0 +1,1004 @@
+/******************************************************************************
+ * This file contains error recovery level zero functions used by
+ * the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+/*
+ *	Used to set values in struct iscsi_cmd that iscsit_dataout_check_sequence()
+ *	checks against to determine a PDU's Offset+Length is within the current
+ *	DataOUT Sequence.  Used for DataSequenceInOrder=Yes only.
+ */
+void iscsit_set_dataout_sequence_values(
+	struct iscsi_cmd *cmd)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	/*
+	 * Still set seq_start_offset and seq_end_offset for Unsolicited
+	 * DataOUT, even if DataSequenceInOrder=No.
+	 */
+	if (cmd->unsolicited_data) {
+		cmd->seq_start_offset = cmd->write_data_done;
+		cmd->seq_end_offset = (cmd->write_data_done +
+			(cmd->data_length >
+			 conn->sess->sess_ops->FirstBurstLength) ?
+			conn->sess->sess_ops->FirstBurstLength : cmd->data_length);
+		return;
+	}
+
+	if (!conn->sess->sess_ops->DataSequenceInOrder)
+		return;
+
+	if (!cmd->seq_start_offset && !cmd->seq_end_offset) {
+		cmd->seq_start_offset = cmd->write_data_done;
+		cmd->seq_end_offset = (cmd->data_length >
+			conn->sess->sess_ops->MaxBurstLength) ?
+			(cmd->write_data_done +
+			conn->sess->sess_ops->MaxBurstLength) : cmd->data_length;
+	} else {
+		cmd->seq_start_offset = cmd->seq_end_offset;
+		cmd->seq_end_offset = ((cmd->seq_end_offset +
+			conn->sess->sess_ops->MaxBurstLength) >=
+			cmd->data_length) ? cmd->data_length :
+			(cmd->seq_end_offset +
+			 conn->sess->sess_ops->MaxBurstLength);
+	}
+}
+
+static int iscsit_dataout_within_command_recovery_check(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	/*
+	 * We do the within-command recovery checks here as it is
+	 * the first function called in iscsi_check_pre_dataout().
+	 * Basically, if we are in within-command recovery and
+	 * the PDU does not contain the offset the sequence needs,
+	 * dump the payload.
+	 *
+	 * This only applies to DataPDUInOrder=Yes, for
+	 * DataPDUInOrder=No we only re-request the failed PDU
+	 * and check that all PDUs in a sequence are received
+	 * upon end of sequence.
+	 */
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		if ((cmd->cmd_flags & ICF_WITHIN_COMMAND_RECOVERY) &&
+		    (cmd->write_data_done != hdr->offset))
+			goto dump;
+
+		cmd->cmd_flags &= ~ICF_WITHIN_COMMAND_RECOVERY;
+	} else {
+		struct iscsi_seq *seq;
+
+		seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length);
+		if (!seq)
+			return DATAOUT_CANNOT_RECOVER;
+		/*
+		 * Set the struct iscsi_seq pointer to reuse later.
+		 */
+		cmd->seq_ptr = seq;
+
+		if (conn->sess->sess_ops->DataPDUInOrder) {
+			if ((seq->status ==
+			     DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) &&
+			   ((seq->offset != hdr->offset) ||
+			    (seq->data_sn != hdr->datasn)))
+				goto dump;
+		} else {
+			if ((seq->status ==
+			     DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) &&
+			    (seq->data_sn != hdr->datasn))
+				goto dump;
+		}
+
+		if (seq->status == DATAOUT_SEQUENCE_COMPLETE)
+			goto dump;
+
+		if (seq->status != DATAOUT_SEQUENCE_COMPLETE)
+			seq->status = 0;
+	}
+
+	return DATAOUT_NORMAL;
+
+dump:
+	pr_err("Dumping DataOUT PDU Offset: %u Length: %d DataSN:"
+		" 0x%08x\n", hdr->offset, payload_length, hdr->datasn);
+	return iscsit_dump_data_payload(conn, payload_length, 1);
+}
+
+static int iscsit_dataout_check_unsolicited_sequence(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	u32 first_burst_len;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+
+	if ((hdr->offset < cmd->seq_start_offset) ||
+	   ((hdr->offset + payload_length) > cmd->seq_end_offset)) {
+		pr_err("Command ITT: 0x%08x with Offset: %u,"
+		" Length: %u outside of Unsolicited Sequence %u:%u while"
+		" DataSequenceInOrder=Yes.\n", cmd->init_task_tag,
+		hdr->offset, payload_length, cmd->seq_start_offset,
+			cmd->seq_end_offset);
+		return DATAOUT_CANNOT_RECOVER;
+	}
+
+	first_burst_len = (cmd->first_burst_len + payload_length);
+
+	if (first_burst_len > conn->sess->sess_ops->FirstBurstLength) {
+		pr_err("Total %u bytes exceeds FirstBurstLength: %u"
+			" for this Unsolicited DataOut Burst.\n",
+			first_burst_len, conn->sess->sess_ops->FirstBurstLength);
+		transport_send_check_condition_and_sense(&cmd->se_cmd,
+				TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+		return DATAOUT_CANNOT_RECOVER;
+	}
+
+	/*
+	 * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity
+	 * checks for the current Unsolicited DataOUT Sequence.
+	 */
+	if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
+		/*
+		 * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of
+		 * sequence checks are handled in
+		 * iscsit_dataout_datapduinorder_no_fbit().
+		 */
+		if (!conn->sess->sess_ops->DataPDUInOrder)
+			goto out;
+
+		if ((first_burst_len != cmd->data_length) &&
+		    (first_burst_len != conn->sess->sess_ops->FirstBurstLength)) {
+			pr_err("Unsolicited non-immediate data"
+			" received %u does not equal FirstBurstLength: %u, and"
+			" does not equal ExpXferLen %u.\n", first_burst_len,
+				conn->sess->sess_ops->FirstBurstLength,
+				cmd->data_length);
+			transport_send_check_condition_and_sense(&cmd->se_cmd,
+					TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+			return DATAOUT_CANNOT_RECOVER;
+		}
+	} else {
+		if (first_burst_len == conn->sess->sess_ops->FirstBurstLength) {
+			pr_err("Command ITT: 0x%08x reached"
+			" FirstBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol"
+				" error.\n", cmd->init_task_tag,
+				conn->sess->sess_ops->FirstBurstLength);
+			return DATAOUT_CANNOT_RECOVER;
+		}
+		if (first_burst_len == cmd->data_length) {
+			pr_err("Command ITT: 0x%08x reached"
+			" ExpXferLen: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol"
+			" error.\n", cmd->init_task_tag, cmd->data_length);
+			return DATAOUT_CANNOT_RECOVER;
+		}
+	}
+
+out:
+	return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_check_sequence(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	u32 next_burst_len;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_seq *seq = NULL;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	/*
+	 * For DataSequenceInOrder=Yes: Check that the offset and offset+length
+	 * is within range as defined by iscsi_set_dataout_sequence_values().
+	 *
+	 * For DataSequenceInOrder=No: Check that an struct iscsi_seq exists for
+	 * offset+length tuple.
+	 */
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		/*
+		 * Due to possibility of recovery DataOUT sent by the initiator
+		 * fullfilling an Recovery R2T, it's best to just dump the
+		 * payload here, instead of erroring out.
+		 */
+		if ((hdr->offset < cmd->seq_start_offset) ||
+		   ((hdr->offset + payload_length) > cmd->seq_end_offset)) {
+			pr_err("Command ITT: 0x%08x with Offset: %u,"
+			" Length: %u outside of Sequence %u:%u while"
+			" DataSequenceInOrder=Yes.\n", cmd->init_task_tag,
+			hdr->offset, payload_length, cmd->seq_start_offset,
+				cmd->seq_end_offset);
+
+			if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+				return DATAOUT_CANNOT_RECOVER;
+			return DATAOUT_WITHIN_COMMAND_RECOVERY;
+		}
+
+		next_burst_len = (cmd->next_burst_len + payload_length);
+	} else {
+		seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length);
+		if (!seq)
+			return DATAOUT_CANNOT_RECOVER;
+		/*
+		 * Set the struct iscsi_seq pointer to reuse later.
+		 */
+		cmd->seq_ptr = seq;
+
+		if (seq->status == DATAOUT_SEQUENCE_COMPLETE) {
+			if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+				return DATAOUT_CANNOT_RECOVER;
+			return DATAOUT_WITHIN_COMMAND_RECOVERY;
+		}
+
+		next_burst_len = (seq->next_burst_len + payload_length);
+	}
+
+	if (next_burst_len > conn->sess->sess_ops->MaxBurstLength) {
+		pr_err("Command ITT: 0x%08x, NextBurstLength: %u and"
+			" Length: %u exceeds MaxBurstLength: %u. protocol"
+			" error.\n", cmd->init_task_tag,
+			(next_burst_len - payload_length),
+			payload_length, conn->sess->sess_ops->MaxBurstLength);
+		return DATAOUT_CANNOT_RECOVER;
+	}
+
+	/*
+	 * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity
+	 * checks for the current DataOUT Sequence.
+	 */
+	if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
+		/*
+		 * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of
+		 * sequence checks are handled in
+		 * iscsit_dataout_datapduinorder_no_fbit().
+		 */
+		if (!conn->sess->sess_ops->DataPDUInOrder)
+			goto out;
+
+		if (conn->sess->sess_ops->DataSequenceInOrder) {
+			if ((next_burst_len <
+			     conn->sess->sess_ops->MaxBurstLength) &&
+			   ((cmd->write_data_done + payload_length) <
+			     cmd->data_length)) {
+				pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL"
+				" before end of DataOUT sequence, protocol"
+				" error.\n", cmd->init_task_tag);
+				return DATAOUT_CANNOT_RECOVER;
+			}
+		} else {
+			if (next_burst_len < seq->xfer_len) {
+				pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL"
+				" before end of DataOUT sequence, protocol"
+				" error.\n", cmd->init_task_tag);
+				return DATAOUT_CANNOT_RECOVER;
+			}
+		}
+	} else {
+		if (conn->sess->sess_ops->DataSequenceInOrder) {
+			if (next_burst_len ==
+					conn->sess->sess_ops->MaxBurstLength) {
+				pr_err("Command ITT: 0x%08x reached"
+				" MaxBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is"
+				" not set, protocol error.", cmd->init_task_tag,
+					conn->sess->sess_ops->MaxBurstLength);
+				return DATAOUT_CANNOT_RECOVER;
+			}
+			if ((cmd->write_data_done + payload_length) ==
+					cmd->data_length) {
+				pr_err("Command ITT: 0x%08x reached"
+				" last DataOUT PDU in sequence but ISCSI_FLAG_"
+				"CMD_FINAL is not set, protocol error.\n",
+					cmd->init_task_tag);
+				return DATAOUT_CANNOT_RECOVER;
+			}
+		} else {
+			if (next_burst_len == seq->xfer_len) {
+				pr_err("Command ITT: 0x%08x reached"
+				" last DataOUT PDU in sequence but ISCSI_FLAG_"
+				"CMD_FINAL is not set, protocol error.\n",
+					cmd->init_task_tag);
+				return DATAOUT_CANNOT_RECOVER;
+			}
+		}
+	}
+
+out:
+	return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_check_datasn(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	int dump = 0, recovery = 0;
+	u32 data_sn = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	/*
+	 * Considering the target has no method of re-requesting DataOUT
+	 * by DataSN, if we receieve a greater DataSN than expected we
+	 * assume the functions for DataPDUInOrder=[Yes,No] below will
+	 * handle it.
+	 *
+	 * If the DataSN is less than expected, dump the payload.
+	 */
+	if (conn->sess->sess_ops->DataSequenceInOrder)
+		data_sn = cmd->data_sn;
+	else {
+		struct iscsi_seq *seq = cmd->seq_ptr;
+		data_sn = seq->data_sn;
+	}
+
+	if (hdr->datasn > data_sn) {
+		pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
+			" higher than expected 0x%08x.\n", cmd->init_task_tag,
+				hdr->datasn, data_sn);
+		recovery = 1;
+		goto recover;
+	} else if (hdr->datasn < data_sn) {
+		pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
+			" lower than expected 0x%08x, discarding payload.\n",
+			cmd->init_task_tag, hdr->datasn, data_sn);
+		dump = 1;
+		goto dump;
+	}
+
+	return DATAOUT_NORMAL;
+
+recover:
+	if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+		pr_err("Unable to perform within-command recovery"
+				" while ERL=0.\n");
+		return DATAOUT_CANNOT_RECOVER;
+	}
+dump:
+	if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+		return DATAOUT_CANNOT_RECOVER;
+
+	return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY :
+				DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_pre_datapduinorder_yes(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	int dump = 0, recovery = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	/*
+	 * For DataSequenceInOrder=Yes: If the offset is greater than the global
+	 * DataPDUInOrder=Yes offset counter in struct iscsi_cmd a protcol error has
+	 * occured and fail the connection.
+	 *
+	 * For DataSequenceInOrder=No: If the offset is greater than the per
+	 * sequence DataPDUInOrder=Yes offset counter in struct iscsi_seq a protocol
+	 * error has occured and fail the connection.
+	 */
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		if (hdr->offset != cmd->write_data_done) {
+			pr_err("Command ITT: 0x%08x, received offset"
+			" %u different than expected %u.\n", cmd->init_task_tag,
+				hdr->offset, cmd->write_data_done);
+			recovery = 1;
+			goto recover;
+		}
+	} else {
+		struct iscsi_seq *seq = cmd->seq_ptr;
+
+		if (hdr->offset > seq->offset) {
+			pr_err("Command ITT: 0x%08x, received offset"
+			" %u greater than expected %u.\n", cmd->init_task_tag,
+				hdr->offset, seq->offset);
+			recovery = 1;
+			goto recover;
+		} else if (hdr->offset < seq->offset) {
+			pr_err("Command ITT: 0x%08x, received offset"
+			" %u less than expected %u, discarding payload.\n",
+				cmd->init_task_tag, hdr->offset, seq->offset);
+			dump = 1;
+			goto dump;
+		}
+	}
+
+	return DATAOUT_NORMAL;
+
+recover:
+	if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+		pr_err("Unable to perform within-command recovery"
+				" while ERL=0.\n");
+		return DATAOUT_CANNOT_RECOVER;
+	}
+dump:
+	if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+		return DATAOUT_CANNOT_RECOVER;
+
+	return (recovery) ? iscsit_recover_dataout_sequence(cmd,
+		hdr->offset, payload_length) :
+	       (dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_pre_datapduinorder_no(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	struct iscsi_pdu *pdu;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	pdu = iscsit_get_pdu_holder(cmd, hdr->offset, payload_length);
+	if (!pdu)
+		return DATAOUT_CANNOT_RECOVER;
+
+	cmd->pdu_ptr = pdu;
+
+	switch (pdu->status) {
+	case ISCSI_PDU_NOT_RECEIVED:
+	case ISCSI_PDU_CRC_FAILED:
+	case ISCSI_PDU_TIMED_OUT:
+		break;
+	case ISCSI_PDU_RECEIVED_OK:
+		pr_err("Command ITT: 0x%08x received already gotten"
+			" Offset: %u, Length: %u\n", cmd->init_task_tag,
+				hdr->offset, payload_length);
+		return iscsit_dump_data_payload(cmd->conn, payload_length, 1);
+	default:
+		return DATAOUT_CANNOT_RECOVER;
+	}
+
+	return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_update_r2t(struct iscsi_cmd *cmd, u32 offset, u32 length)
+{
+	struct iscsi_r2t *r2t;
+
+	if (cmd->unsolicited_data)
+		return 0;
+
+	r2t = iscsit_get_r2t_for_eos(cmd, offset, length);
+	if (!r2t)
+		return -1;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	r2t->seq_complete = 1;
+	cmd->outstanding_r2ts--;
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	return 0;
+}
+
+static int iscsit_dataout_update_datapduinorder_no(
+	struct iscsi_cmd *cmd,
+	u32 data_sn,
+	int f_bit)
+{
+	int ret = 0;
+	struct iscsi_pdu *pdu = cmd->pdu_ptr;
+
+	pdu->data_sn = data_sn;
+
+	switch (pdu->status) {
+	case ISCSI_PDU_NOT_RECEIVED:
+		pdu->status = ISCSI_PDU_RECEIVED_OK;
+		break;
+	case ISCSI_PDU_CRC_FAILED:
+		pdu->status = ISCSI_PDU_RECEIVED_OK;
+		break;
+	case ISCSI_PDU_TIMED_OUT:
+		pdu->status = ISCSI_PDU_RECEIVED_OK;
+		break;
+	default:
+		return DATAOUT_CANNOT_RECOVER;
+	}
+
+	if (f_bit) {
+		ret = iscsit_dataout_datapduinorder_no_fbit(cmd, pdu);
+		if (ret == DATAOUT_CANNOT_RECOVER)
+			return ret;
+	}
+
+	return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_post_crc_passed(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	int ret, send_r2t = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_seq *seq = NULL;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	if (cmd->unsolicited_data) {
+		if ((cmd->first_burst_len + payload_length) ==
+		     conn->sess->sess_ops->FirstBurstLength) {
+			if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+					payload_length) < 0)
+				return DATAOUT_CANNOT_RECOVER;
+			send_r2t = 1;
+		}
+
+		if (!conn->sess->sess_ops->DataPDUInOrder) {
+			ret = iscsit_dataout_update_datapduinorder_no(cmd,
+				hdr->datasn, (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+			if (ret == DATAOUT_CANNOT_RECOVER)
+				return ret;
+		}
+
+		cmd->first_burst_len += payload_length;
+
+		if (conn->sess->sess_ops->DataSequenceInOrder)
+			cmd->data_sn++;
+		else {
+			seq = cmd->seq_ptr;
+			seq->data_sn++;
+			seq->offset += payload_length;
+		}
+
+		if (send_r2t) {
+			if (seq)
+				seq->status = DATAOUT_SEQUENCE_COMPLETE;
+			cmd->first_burst_len = 0;
+			cmd->unsolicited_data = 0;
+		}
+	} else {
+		if (conn->sess->sess_ops->DataSequenceInOrder) {
+			if ((cmd->next_burst_len + payload_length) ==
+			     conn->sess->sess_ops->MaxBurstLength) {
+				if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+						payload_length) < 0)
+					return DATAOUT_CANNOT_RECOVER;
+				send_r2t = 1;
+			}
+
+			if (!conn->sess->sess_ops->DataPDUInOrder) {
+				ret = iscsit_dataout_update_datapduinorder_no(
+						cmd, hdr->datasn,
+						(hdr->flags & ISCSI_FLAG_CMD_FINAL));
+				if (ret == DATAOUT_CANNOT_RECOVER)
+					return ret;
+			}
+
+			cmd->next_burst_len += payload_length;
+			cmd->data_sn++;
+
+			if (send_r2t)
+				cmd->next_burst_len = 0;
+		} else {
+			seq = cmd->seq_ptr;
+
+			if ((seq->next_burst_len + payload_length) ==
+			     seq->xfer_len) {
+				if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+						payload_length) < 0)
+					return DATAOUT_CANNOT_RECOVER;
+				send_r2t = 1;
+			}
+
+			if (!conn->sess->sess_ops->DataPDUInOrder) {
+				ret = iscsit_dataout_update_datapduinorder_no(
+						cmd, hdr->datasn,
+						(hdr->flags & ISCSI_FLAG_CMD_FINAL));
+				if (ret == DATAOUT_CANNOT_RECOVER)
+					return ret;
+			}
+
+			seq->data_sn++;
+			seq->offset += payload_length;
+			seq->next_burst_len += payload_length;
+
+			if (send_r2t) {
+				seq->next_burst_len = 0;
+				seq->status = DATAOUT_SEQUENCE_COMPLETE;
+			}
+		}
+	}
+
+	if (send_r2t && conn->sess->sess_ops->DataSequenceInOrder)
+		cmd->data_sn = 0;
+
+	cmd->write_data_done += payload_length;
+
+	return (cmd->write_data_done == cmd->data_length) ?
+		DATAOUT_SEND_TO_TRANSPORT : (send_r2t) ?
+		DATAOUT_SEND_R2T : DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_post_crc_failed(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_pdu *pdu;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	if (conn->sess->sess_ops->DataPDUInOrder)
+		goto recover;
+	/*
+	 * The rest of this function is only called when DataPDUInOrder=No.
+	 */
+	pdu = cmd->pdu_ptr;
+
+	switch (pdu->status) {
+	case ISCSI_PDU_NOT_RECEIVED:
+		pdu->status = ISCSI_PDU_CRC_FAILED;
+		break;
+	case ISCSI_PDU_CRC_FAILED:
+		break;
+	case ISCSI_PDU_TIMED_OUT:
+		pdu->status = ISCSI_PDU_CRC_FAILED;
+		break;
+	default:
+		return DATAOUT_CANNOT_RECOVER;
+	}
+
+recover:
+	return iscsit_recover_dataout_sequence(cmd, hdr->offset, payload_length);
+}
+
+/*
+ *	Called from iscsit_handle_data_out() before DataOUT Payload is received
+ *	and CRC computed.
+ */
+extern int iscsit_check_pre_dataout(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	int ret;
+	struct iscsi_conn *conn = cmd->conn;
+
+	ret = iscsit_dataout_within_command_recovery_check(cmd, buf);
+	if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+	    (ret == DATAOUT_CANNOT_RECOVER))
+		return ret;
+
+	ret = iscsit_dataout_check_datasn(cmd, buf);
+	if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+	    (ret == DATAOUT_CANNOT_RECOVER))
+		return ret;
+
+	if (cmd->unsolicited_data) {
+		ret = iscsit_dataout_check_unsolicited_sequence(cmd, buf);
+		if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+		    (ret == DATAOUT_CANNOT_RECOVER))
+			return ret;
+	} else {
+		ret = iscsit_dataout_check_sequence(cmd, buf);
+		if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+		    (ret == DATAOUT_CANNOT_RECOVER))
+			return ret;
+	}
+
+	return (conn->sess->sess_ops->DataPDUInOrder) ?
+		iscsit_dataout_pre_datapduinorder_yes(cmd, buf) :
+		iscsit_dataout_pre_datapduinorder_no(cmd, buf);
+}
+
+/*
+ *	Called from iscsit_handle_data_out() after DataOUT Payload is received
+ *	and CRC computed.
+ */
+int iscsit_check_post_dataout(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf,
+	u8 data_crc_failed)
+{
+	struct iscsi_conn *conn = cmd->conn;
+
+	cmd->dataout_timeout_retries = 0;
+
+	if (!data_crc_failed)
+		return iscsit_dataout_post_crc_passed(cmd, buf);
+	else {
+		if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+			pr_err("Unable to recover from DataOUT CRC"
+				" failure while ERL=0, closing session.\n");
+			iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR,
+					1, 0, buf, cmd);
+			return DATAOUT_CANNOT_RECOVER;
+		}
+
+		iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR,
+				0, 0, buf, cmd);
+		return iscsit_dataout_post_crc_failed(cmd, buf);
+	}
+}
+
+static void iscsit_handle_time2retain_timeout(unsigned long data)
+{
+	struct iscsi_session *sess = (struct iscsi_session *) data;
+	struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+	spin_lock_bh(&se_tpg->session_lock);
+	if (sess->time2retain_timer_flags & ISCSI_TF_STOP) {
+		spin_unlock_bh(&se_tpg->session_lock);
+		return;
+	}
+	if (atomic_read(&sess->session_reinstatement)) {
+		pr_err("Exiting Time2Retain handler because"
+				" session_reinstatement=1\n");
+		spin_unlock_bh(&se_tpg->session_lock);
+		return;
+	}
+	sess->time2retain_timer_flags |= ISCSI_TF_EXPIRED;
+
+	pr_err("Time2Retain timer expired for SID: %u, cleaning up"
+			" iSCSI session.\n", sess->sid);
+	{
+	struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+	if (tiqn) {
+		spin_lock(&tiqn->sess_err_stats.lock);
+		strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name,
+			(void *)sess->sess_ops->InitiatorName);
+		tiqn->sess_err_stats.last_sess_failure_type =
+				ISCSI_SESS_ERR_CXN_TIMEOUT;
+		tiqn->sess_err_stats.cxn_timeout_errors++;
+		sess->conn_timeout_errors++;
+		spin_unlock(&tiqn->sess_err_stats.lock);
+	}
+	}
+
+	spin_unlock_bh(&se_tpg->session_lock);
+	iscsit_close_session(sess);
+}
+
+extern void iscsit_start_time2retain_handler(struct iscsi_session *sess)
+{
+	int tpg_active;
+	/*
+	 * Only start Time2Retain timer when the assoicated TPG is still in
+	 * an ACTIVE (eg: not disabled or shutdown) state.
+	 */
+	spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock);
+	tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE);
+	spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock);
+
+	if (!tpg_active)
+		return;
+
+	if (sess->time2retain_timer_flags & ISCSI_TF_RUNNING)
+		return;
+
+	pr_debug("Starting Time2Retain timer for %u seconds on"
+		" SID: %u\n", sess->sess_ops->DefaultTime2Retain, sess->sid);
+
+	init_timer(&sess->time2retain_timer);
+	sess->time2retain_timer.expires =
+		(get_jiffies_64() + sess->sess_ops->DefaultTime2Retain * HZ);
+	sess->time2retain_timer.data = (unsigned long)sess;
+	sess->time2retain_timer.function = iscsit_handle_time2retain_timeout;
+	sess->time2retain_timer_flags &= ~ISCSI_TF_STOP;
+	sess->time2retain_timer_flags |= ISCSI_TF_RUNNING;
+	add_timer(&sess->time2retain_timer);
+}
+
+/*
+ *	Called with spin_lock_bh(&struct se_portal_group->session_lock) held
+ */
+extern int iscsit_stop_time2retain_timer(struct iscsi_session *sess)
+{
+	struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+	if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)
+		return -1;
+
+	if (!(sess->time2retain_timer_flags & ISCSI_TF_RUNNING))
+		return 0;
+
+	sess->time2retain_timer_flags |= ISCSI_TF_STOP;
+	spin_unlock_bh(&se_tpg->session_lock);
+
+	del_timer_sync(&sess->time2retain_timer);
+
+	spin_lock_bh(&se_tpg->session_lock);
+	sess->time2retain_timer_flags &= ~ISCSI_TF_RUNNING;
+	pr_debug("Stopped Time2Retain Timer for SID: %u\n",
+			sess->sid);
+	return 0;
+}
+
+void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->state_lock);
+	if (atomic_read(&conn->connection_exit)) {
+		spin_unlock_bh(&conn->state_lock);
+		goto sleep;
+	}
+
+	if (atomic_read(&conn->transport_failed)) {
+		spin_unlock_bh(&conn->state_lock);
+		goto sleep;
+	}
+	spin_unlock_bh(&conn->state_lock);
+
+	iscsi_thread_set_force_reinstatement(conn);
+
+sleep:
+	wait_for_completion(&conn->conn_wait_rcfr_comp);
+	complete(&conn->conn_post_wait_comp);
+}
+
+void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep)
+{
+	spin_lock_bh(&conn->state_lock);
+	if (atomic_read(&conn->connection_exit)) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+
+	if (atomic_read(&conn->transport_failed)) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+
+	if (atomic_read(&conn->connection_reinstatement)) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+
+	if (iscsi_thread_set_force_reinstatement(conn) < 0) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+
+	atomic_set(&conn->connection_reinstatement, 1);
+	if (!sleep) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+
+	atomic_set(&conn->sleep_on_conn_wait_comp, 1);
+	spin_unlock_bh(&conn->state_lock);
+
+	wait_for_completion(&conn->conn_wait_comp);
+	complete(&conn->conn_post_wait_comp);
+}
+
+void iscsit_fall_back_to_erl0(struct iscsi_session *sess)
+{
+	pr_debug("Falling back to ErrorRecoveryLevel=0 for SID:"
+			" %u\n", sess->sid);
+
+	atomic_set(&sess->session_fall_back_to_erl0, 1);
+}
+
+static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+
+	if ((sess->sess_ops->ErrorRecoveryLevel == 2) &&
+	    !atomic_read(&sess->session_reinstatement) &&
+	    !atomic_read(&sess->session_fall_back_to_erl0))
+		iscsit_connection_recovery_transport_reset(conn);
+	else {
+		pr_debug("Performing cleanup for failed iSCSI"
+			" Connection ID: %hu from %s\n", conn->cid,
+			sess->sess_ops->InitiatorName);
+		iscsit_close_connection(conn);
+	}
+}
+
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->state_lock);
+	if (atomic_read(&conn->connection_exit)) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+	atomic_set(&conn->connection_exit, 1);
+
+	if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) {
+		spin_unlock_bh(&conn->state_lock);
+		iscsit_close_connection(conn);
+		return;
+	}
+
+	if (conn->conn_state == TARG_CONN_STATE_CLEANUP_WAIT) {
+		spin_unlock_bh(&conn->state_lock);
+		return;
+	}
+
+	pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n");
+	conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT;
+	spin_unlock_bh(&conn->state_lock);
+
+	iscsit_handle_connection_cleanup(conn);
+}
+
+/*
+ *	This is the simple function that makes the magic of
+ *	sync and steering happen in the follow paradoxical order:
+ *
+ *	0) Receive conn->of_marker (bytes left until next OFMarker)
+ *	   bytes into an offload buffer.  When we pass the exact number
+ *	   of bytes in conn->of_marker, iscsit_dump_data_payload() and hence
+ *	   rx_data() will automatically receive the identical u32 marker
+ *	   values and store it in conn->of_marker_offset;
+ *	1) Now conn->of_marker_offset will contain the offset to the start
+ *	   of the next iSCSI PDU.  Dump these remaining bytes into another
+ *	   offload buffer.
+ *	2) We are done!
+ *	   Next byte in the TCP stream will contain the next iSCSI PDU!
+ *	   Cool Huh?!
+ */
+int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn)
+{
+	/*
+	 * Make sure the remaining bytes to next maker is a sane value.
+	 */
+	if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) {
+		pr_err("Remaining bytes to OFMarker: %u exceeds"
+			" OFMarkInt bytes: %u.\n", conn->of_marker,
+				conn->conn_ops->OFMarkInt * 4);
+		return -1;
+	}
+
+	pr_debug("Advancing %u bytes in TCP stream to get to the"
+			" next OFMarker.\n", conn->of_marker);
+
+	if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0)
+		return -1;
+
+	/*
+	 * Make sure the offset marker we retrived is a valid value.
+	 */
+	if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) +
+	    conn->conn_ops->MaxRecvDataSegmentLength)) {
+		pr_err("OfMarker offset value: %u exceeds limit.\n",
+			conn->of_marker_offset);
+		return -1;
+	}
+
+	pr_debug("Discarding %u bytes of TCP stream to get to the"
+			" next iSCSI Opcode.\n", conn->of_marker_offset);
+
+	if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0)
+		return -1;
+
+	return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h
new file mode 100644
index 0000000..21acc9a
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl0.h
@@ -0,0 +1,15 @@
+#ifndef ISCSI_TARGET_ERL0_H
+#define ISCSI_TARGET_ERL0_H
+
+extern void iscsit_set_dataout_sequence_values(struct iscsi_cmd *);
+extern int iscsit_check_pre_dataout(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_check_post_dataout(struct iscsi_cmd *, unsigned char *, u8);
+extern void iscsit_start_time2retain_handler(struct iscsi_session *);
+extern int iscsit_stop_time2retain_timer(struct iscsi_session *);
+extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *);
+extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int);
+extern void iscsit_fall_back_to_erl0(struct iscsi_session *);
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *);
+extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *);
+
+#endif   /*** ISCSI_TARGET_ERL0_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
new file mode 100644
index 0000000..9806507
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -0,0 +1,1299 @@
+/*******************************************************************************
+ * This file contains error recovery level one used by the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target.h"
+
+#define OFFLOAD_BUF_SIZE	32768
+
+/*
+ *	Used to dump excess datain payload for certain error recovery
+ *	situations.  Receive in OFFLOAD_BUF_SIZE max of datain per rx_data().
+ *
+ *	dump_padding_digest denotes if padding and data digests need
+ *	to be dumped.
+ */
+int iscsit_dump_data_payload(
+	struct iscsi_conn *conn,
+	u32 buf_len,
+	int dump_padding_digest)
+{
+	char *buf, pad_bytes[4];
+	int ret = DATAOUT_WITHIN_COMMAND_RECOVERY, rx_got;
+	u32 length, padding, offset = 0, size;
+	struct kvec iov;
+
+	length = (buf_len > OFFLOAD_BUF_SIZE) ? OFFLOAD_BUF_SIZE : buf_len;
+
+	buf = kzalloc(length, GFP_ATOMIC);
+	if (!buf) {
+		pr_err("Unable to allocate %u bytes for offload"
+				" buffer.\n", length);
+		return -1;
+	}
+	memset(&iov, 0, sizeof(struct kvec));
+
+	while (offset < buf_len) {
+		size = ((offset + length) > buf_len) ?
+			(buf_len - offset) : length;
+
+		iov.iov_len = size;
+		iov.iov_base = buf;
+
+		rx_got = rx_data(conn, &iov, 1, size);
+		if (rx_got != size) {
+			ret = DATAOUT_CANNOT_RECOVER;
+			goto out;
+		}
+
+		offset += size;
+	}
+
+	if (!dump_padding_digest)
+		goto out;
+
+	padding = ((-buf_len) & 3);
+	if (padding != 0) {
+		iov.iov_len = padding;
+		iov.iov_base = pad_bytes;
+
+		rx_got = rx_data(conn, &iov, 1, padding);
+		if (rx_got != padding) {
+			ret = DATAOUT_CANNOT_RECOVER;
+			goto out;
+		}
+	}
+
+	if (conn->conn_ops->DataDigest) {
+		u32 data_crc;
+
+		iov.iov_len = ISCSI_CRC_LEN;
+		iov.iov_base = &data_crc;
+
+		rx_got = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
+		if (rx_got != ISCSI_CRC_LEN) {
+			ret = DATAOUT_CANNOT_RECOVER;
+			goto out;
+		}
+	}
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+/*
+ *	Used for retransmitting R2Ts from a R2T SNACK request.
+ */
+static int iscsit_send_recovery_r2t_for_snack(
+	struct iscsi_cmd *cmd,
+	struct iscsi_r2t *r2t)
+{
+	/*
+	 * If the struct iscsi_r2t has not been sent yet, we can safely
+	 * ignore retransmission
+	 * of the R2TSN in question.
+	 */
+	spin_lock_bh(&cmd->r2t_lock);
+	if (!r2t->sent_r2t) {
+		spin_unlock_bh(&cmd->r2t_lock);
+		return 0;
+	}
+	r2t->sent_r2t = 0;
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T);
+
+	return 0;
+}
+
+static int iscsit_handle_r2t_snack(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf,
+	u32 begrun,
+	u32 runlength)
+{
+	u32 last_r2tsn;
+	struct iscsi_r2t *r2t;
+
+	/*
+	 * Make sure the initiator is not requesting retransmission
+	 * of R2TSNs already acknowledged by a TMR TASK_REASSIGN.
+	 */
+	if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+	    (begrun <= cmd->acked_data_sn)) {
+		pr_err("ITT: 0x%08x, R2T SNACK requesting"
+			" retransmission of R2TSN: 0x%08x to 0x%08x but already"
+			" acked to  R2TSN: 0x%08x by TMR TASK_REASSIGN,"
+			" protocol error.\n", cmd->init_task_tag, begrun,
+			(begrun + runlength), cmd->acked_data_sn);
+
+			return iscsit_add_reject_from_cmd(
+					ISCSI_REASON_PROTOCOL_ERROR,
+					1, 0, buf, cmd);
+	}
+
+	if (runlength) {
+		if ((begrun + runlength) > cmd->r2t_sn) {
+			pr_err("Command ITT: 0x%08x received R2T SNACK"
+			" with BegRun: 0x%08x, RunLength: 0x%08x, exceeds"
+			" current R2TSN: 0x%08x, protocol error.\n",
+			cmd->init_task_tag, begrun, runlength, cmd->r2t_sn);
+			return iscsit_add_reject_from_cmd(
+				ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd);
+		}
+		last_r2tsn = (begrun + runlength);
+	} else
+		last_r2tsn = cmd->r2t_sn;
+
+	while (begrun < last_r2tsn) {
+		r2t = iscsit_get_holder_for_r2tsn(cmd, begrun);
+		if (!r2t)
+			return -1;
+		if (iscsit_send_recovery_r2t_for_snack(cmd, r2t) < 0)
+			return -1;
+
+		begrun++;
+	}
+
+	return 0;
+}
+
+/*
+ *	Generates Offsets and NextBurstLength based on Begrun and Runlength
+ *	carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN.
+ *
+ *	For DataSequenceInOrder=Yes and DataPDUInOrder=[Yes,No] only.
+ *
+ *	FIXME: How is this handled for a RData SNACK?
+ */
+int iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain_req *dr)
+{
+	u32 data_sn = 0, data_sn_count = 0;
+	u32 pdu_start = 0, seq_no = 0;
+	u32 begrun = dr->begrun;
+	struct iscsi_conn *conn = cmd->conn;
+
+	while (begrun > data_sn++) {
+		data_sn_count++;
+		if ((dr->next_burst_len +
+		     conn->conn_ops->MaxRecvDataSegmentLength) <
+		     conn->sess->sess_ops->MaxBurstLength) {
+			dr->read_data_done +=
+				conn->conn_ops->MaxRecvDataSegmentLength;
+			dr->next_burst_len +=
+				conn->conn_ops->MaxRecvDataSegmentLength;
+		} else {
+			dr->read_data_done +=
+				(conn->sess->sess_ops->MaxBurstLength -
+				 dr->next_burst_len);
+			dr->next_burst_len = 0;
+			pdu_start += data_sn_count;
+			data_sn_count = 0;
+			seq_no++;
+		}
+	}
+
+	if (!conn->sess->sess_ops->DataPDUInOrder) {
+		cmd->seq_no = seq_no;
+		cmd->pdu_start = pdu_start;
+		cmd->pdu_send_order = data_sn_count;
+	}
+
+	return 0;
+}
+
+/*
+ *	Generates Offsets and NextBurstLength based on Begrun and Runlength
+ *	carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN.
+ *
+ *	For DataSequenceInOrder=No and DataPDUInOrder=[Yes,No] only.
+ *
+ *	FIXME: How is this handled for a RData SNACK?
+ */
+int iscsit_create_recovery_datain_values_datasequenceinorder_no(
+	struct iscsi_cmd *cmd,
+	struct iscsi_datain_req *dr)
+{
+	int found_seq = 0, i;
+	u32 data_sn, read_data_done = 0, seq_send_order = 0;
+	u32 begrun = dr->begrun;
+	u32 runlength = dr->runlength;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_seq *first_seq = NULL, *seq = NULL;
+
+	if (!cmd->seq_list) {
+		pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+		return -1;
+	}
+
+	/*
+	 * Calculate read_data_done for all sequences containing a
+	 * first_datasn and last_datasn less than the BegRun.
+	 *
+	 * Locate the struct iscsi_seq the BegRun lies within and calculate
+	 * NextBurstLenghth up to the DataSN based on MaxRecvDataSegmentLength.
+	 *
+	 * Also use struct iscsi_seq->seq_send_order to determine where to start.
+	 */
+	for (i = 0; i < cmd->seq_count; i++) {
+		seq = &cmd->seq_list[i];
+
+		if (!seq->seq_send_order)
+			first_seq = seq;
+
+		/*
+		 * No data has been transferred for this DataIN sequence, so the
+		 * seq->first_datasn and seq->last_datasn have not been set.
+		 */
+		if (!seq->sent) {
+#if 0
+			pr_err("Ignoring non-sent sequence 0x%08x ->"
+				" 0x%08x\n\n", seq->first_datasn,
+				seq->last_datasn);
+#endif
+			continue;
+		}
+
+		/*
+		 * This DataIN sequence is precedes the received BegRun, add the
+		 * total xfer_len of the sequence to read_data_done and reset
+		 * seq->pdu_send_order.
+		 */
+		if ((seq->first_datasn < begrun) &&
+				(seq->last_datasn < begrun)) {
+#if 0
+			pr_err("Pre BegRun sequence 0x%08x ->"
+				" 0x%08x\n", seq->first_datasn,
+				seq->last_datasn);
+#endif
+			read_data_done += cmd->seq_list[i].xfer_len;
+			seq->next_burst_len = seq->pdu_send_order = 0;
+			continue;
+		}
+
+		/*
+		 * The BegRun lies within this DataIN sequence.
+		 */
+		if ((seq->first_datasn <= begrun) &&
+				(seq->last_datasn >= begrun)) {
+#if 0
+			pr_err("Found sequence begrun: 0x%08x in"
+				" 0x%08x -> 0x%08x\n", begrun,
+				seq->first_datasn, seq->last_datasn);
+#endif
+			seq_send_order = seq->seq_send_order;
+			data_sn = seq->first_datasn;
+			seq->next_burst_len = seq->pdu_send_order = 0;
+			found_seq = 1;
+
+			/*
+			 * For DataPDUInOrder=Yes, while the first DataSN of
+			 * the sequence is less than the received BegRun, add
+			 * the MaxRecvDataSegmentLength to read_data_done and
+			 * to the sequence's next_burst_len;
+			 *
+			 * For DataPDUInOrder=No, while the first DataSN of the
+			 * sequence is less than the received BegRun, find the
+			 * struct iscsi_pdu of the DataSN in question and add the
+			 * MaxRecvDataSegmentLength to read_data_done and to the
+			 * sequence's next_burst_len;
+			 */
+			if (conn->sess->sess_ops->DataPDUInOrder) {
+				while (data_sn < begrun) {
+					seq->pdu_send_order++;
+					read_data_done +=
+						conn->conn_ops->MaxRecvDataSegmentLength;
+					seq->next_burst_len +=
+						conn->conn_ops->MaxRecvDataSegmentLength;
+					data_sn++;
+				}
+			} else {
+				int j;
+				struct iscsi_pdu *pdu;
+
+				while (data_sn < begrun) {
+					seq->pdu_send_order++;
+
+					for (j = 0; j < seq->pdu_count; j++) {
+						pdu = &cmd->pdu_list[
+							seq->pdu_start + j];
+						if (pdu->data_sn == data_sn) {
+							read_data_done +=
+								pdu->length;
+							seq->next_burst_len +=
+								pdu->length;
+						}
+					}
+					data_sn++;
+				}
+			}
+			continue;
+		}
+
+		/*
+		 * This DataIN sequence is larger than the received BegRun,
+		 * reset seq->pdu_send_order and continue.
+		 */
+		if ((seq->first_datasn > begrun) ||
+				(seq->last_datasn > begrun)) {
+#if 0
+			pr_err("Post BegRun sequence 0x%08x -> 0x%08x\n",
+					seq->first_datasn, seq->last_datasn);
+#endif
+			seq->next_burst_len = seq->pdu_send_order = 0;
+			continue;
+		}
+	}
+
+	if (!found_seq) {
+		if (!begrun) {
+			if (!first_seq) {
+				pr_err("ITT: 0x%08x, Begrun: 0x%08x"
+					" but first_seq is NULL\n",
+					cmd->init_task_tag, begrun);
+				return -1;
+			}
+			seq_send_order = first_seq->seq_send_order;
+			seq->next_burst_len = seq->pdu_send_order = 0;
+			goto done;
+		}
+
+		pr_err("Unable to locate struct iscsi_seq for ITT: 0x%08x,"
+			" BegRun: 0x%08x, RunLength: 0x%08x while"
+			" DataSequenceInOrder=No and DataPDUInOrder=%s.\n",
+				cmd->init_task_tag, begrun, runlength,
+			(conn->sess->sess_ops->DataPDUInOrder) ? "Yes" : "No");
+		return -1;
+	}
+
+done:
+	dr->read_data_done = read_data_done;
+	dr->seq_send_order = seq_send_order;
+
+	return 0;
+}
+
+static int iscsit_handle_recovery_datain(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf,
+	u32 begrun,
+	u32 runlength)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_datain_req *dr;
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+
+	if (!atomic_read(&se_cmd->t_transport_complete)) {
+		pr_err("Ignoring ITT: 0x%08x Data SNACK\n",
+				cmd->init_task_tag);
+		return 0;
+	}
+
+	/*
+	 * Make sure the initiator is not requesting retransmission
+	 * of DataSNs already acknowledged by a Data ACK SNACK.
+	 */
+	if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+	    (begrun <= cmd->acked_data_sn)) {
+		pr_err("ITT: 0x%08x, Data SNACK requesting"
+			" retransmission of DataSN: 0x%08x to 0x%08x but"
+			" already acked to DataSN: 0x%08x by Data ACK SNACK,"
+			" protocol error.\n", cmd->init_task_tag, begrun,
+			(begrun + runlength), cmd->acked_data_sn);
+
+		return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR,
+				1, 0, buf, cmd);
+	}
+
+	/*
+	 * Make sure BegRun and RunLength in the Data SNACK are sane.
+	 * Note: (cmd->data_sn - 1) will carry the maximum DataSN sent.
+	 */
+	if ((begrun + runlength) > (cmd->data_sn - 1)) {
+		pr_err("Initiator requesting BegRun: 0x%08x, RunLength"
+			": 0x%08x greater than maximum DataSN: 0x%08x.\n",
+				begrun, runlength, (cmd->data_sn - 1));
+		return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID,
+				1, 0, buf, cmd);
+	}
+
+	dr = iscsit_allocate_datain_req();
+	if (!dr)
+		return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+				1, 0, buf, cmd);
+
+	dr->data_sn = dr->begrun = begrun;
+	dr->runlength = runlength;
+	dr->generate_recovery_values = 1;
+	dr->recovery = DATAIN_WITHIN_COMMAND_RECOVERY;
+
+	iscsit_attach_datain_req(cmd, dr);
+
+	cmd->i_state = ISTATE_SEND_DATAIN;
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+	return 0;
+}
+
+int iscsit_handle_recovery_datain_or_r2t(
+	struct iscsi_conn *conn,
+	unsigned char *buf,
+	u32 init_task_tag,
+	u32 targ_xfer_tag,
+	u32 begrun,
+	u32 runlength)
+{
+	struct iscsi_cmd *cmd;
+
+	cmd = iscsit_find_cmd_from_itt(conn, init_task_tag);
+	if (!cmd)
+		return 0;
+
+	/*
+	 * FIXME: This will not work for bidi commands.
+	 */
+	switch (cmd->data_direction) {
+	case DMA_TO_DEVICE:
+		return iscsit_handle_r2t_snack(cmd, buf, begrun, runlength);
+	case DMA_FROM_DEVICE:
+		return iscsit_handle_recovery_datain(cmd, buf, begrun,
+				runlength);
+	default:
+		pr_err("Unknown cmd->data_direction: 0x%02x\n",
+				cmd->data_direction);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* #warning FIXME: Status SNACK needs to be dependent on OPCODE!!! */
+int iscsit_handle_status_snack(
+	struct iscsi_conn *conn,
+	u32 init_task_tag,
+	u32 targ_xfer_tag,
+	u32 begrun,
+	u32 runlength)
+{
+	struct iscsi_cmd *cmd = NULL;
+	u32 last_statsn;
+	int found_cmd;
+
+	if (conn->exp_statsn > begrun) {
+		pr_err("Got Status SNACK Begrun: 0x%08x, RunLength:"
+			" 0x%08x but already got ExpStatSN: 0x%08x on CID:"
+			" %hu.\n", begrun, runlength, conn->exp_statsn,
+			conn->cid);
+		return 0;
+	}
+
+	last_statsn = (!runlength) ? conn->stat_sn : (begrun + runlength);
+
+	while (begrun < last_statsn) {
+		found_cmd = 0;
+
+		spin_lock_bh(&conn->cmd_lock);
+		list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+			if (cmd->stat_sn == begrun) {
+				found_cmd = 1;
+				break;
+			}
+		}
+		spin_unlock_bh(&conn->cmd_lock);
+
+		if (!found_cmd) {
+			pr_err("Unable to find StatSN: 0x%08x for"
+				" a Status SNACK, assuming this was a"
+				" protactic SNACK for an untransmitted"
+				" StatSN, ignoring.\n", begrun);
+			begrun++;
+			continue;
+		}
+
+		spin_lock_bh(&cmd->istate_lock);
+		if (cmd->i_state == ISTATE_SEND_DATAIN) {
+			spin_unlock_bh(&cmd->istate_lock);
+			pr_err("Ignoring Status SNACK for BegRun:"
+				" 0x%08x, RunLength: 0x%08x, assuming this was"
+				" a protactic SNACK for an untransmitted"
+				" StatSN\n", begrun, runlength);
+			begrun++;
+			continue;
+		}
+		spin_unlock_bh(&cmd->istate_lock);
+
+		cmd->i_state = ISTATE_SEND_STATUS_RECOVERY;
+		iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+		begrun++;
+	}
+
+	return 0;
+}
+
+int iscsit_handle_data_ack(
+	struct iscsi_conn *conn,
+	u32 targ_xfer_tag,
+	u32 begrun,
+	u32 runlength)
+{
+	struct iscsi_cmd *cmd = NULL;
+
+	cmd = iscsit_find_cmd_from_ttt(conn, targ_xfer_tag);
+	if (!cmd) {
+		pr_err("Data ACK SNACK for TTT: 0x%08x is"
+			" invalid.\n", targ_xfer_tag);
+		return -1;
+	}
+
+	if (begrun <= cmd->acked_data_sn) {
+		pr_err("ITT: 0x%08x Data ACK SNACK BegRUN: 0x%08x is"
+			" less than the already acked DataSN: 0x%08x.\n",
+			cmd->init_task_tag, begrun, cmd->acked_data_sn);
+		return -1;
+	}
+
+	/*
+	 * For Data ACK SNACK, BegRun is the next expected DataSN.
+	 * (see iSCSI v19: 10.16.6)
+	 */
+	cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+	cmd->acked_data_sn = (begrun - 1);
+
+	pr_debug("Received Data ACK SNACK for ITT: 0x%08x,"
+		" updated acked DataSN to 0x%08x.\n",
+			cmd->init_task_tag, cmd->acked_data_sn);
+
+	return 0;
+}
+
+static int iscsit_send_recovery_r2t(
+	struct iscsi_cmd *cmd,
+	u32 offset,
+	u32 xfer_len)
+{
+	int ret;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	ret = iscsit_add_r2t_to_list(cmd, offset, xfer_len, 1, 0);
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	return ret;
+}
+
+int iscsit_dataout_datapduinorder_no_fbit(
+	struct iscsi_cmd *cmd,
+	struct iscsi_pdu *pdu)
+{
+	int i, send_recovery_r2t = 0, recovery = 0;
+	u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_pdu *first_pdu = NULL;
+
+	/*
+	 * Get an struct iscsi_pdu pointer to the first PDU, and total PDU count
+	 * of the DataOUT sequence.
+	 */
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		for (i = 0; i < cmd->pdu_count; i++) {
+			if (cmd->pdu_list[i].seq_no == pdu->seq_no) {
+				if (!first_pdu)
+					first_pdu = &cmd->pdu_list[i];
+				 xfer_len += cmd->pdu_list[i].length;
+				 pdu_count++;
+			} else if (pdu_count)
+				break;
+		}
+	} else {
+		struct iscsi_seq *seq = cmd->seq_ptr;
+
+		first_pdu = &cmd->pdu_list[seq->pdu_start];
+		pdu_count = seq->pdu_count;
+	}
+
+	if (!first_pdu || !pdu_count)
+		return DATAOUT_CANNOT_RECOVER;
+
+	/*
+	 * Loop through the ending DataOUT Sequence checking each struct iscsi_pdu.
+	 * The following ugly logic does batching of not received PDUs.
+	 */
+	for (i = 0; i < pdu_count; i++) {
+		if (first_pdu[i].status == ISCSI_PDU_RECEIVED_OK) {
+			if (!send_recovery_r2t)
+				continue;
+
+			if (iscsit_send_recovery_r2t(cmd, offset, length) < 0)
+				return DATAOUT_CANNOT_RECOVER;
+
+			send_recovery_r2t = length = offset = 0;
+			continue;
+		}
+		/*
+		 * Set recovery = 1 for any missing, CRC failed, or timed
+		 * out PDUs to let the DataOUT logic know that this sequence
+		 * has not been completed yet.
+		 *
+		 * Also, only send a Recovery R2T for ISCSI_PDU_NOT_RECEIVED.
+		 * We assume if the PDU either failed CRC or timed out
+		 * that a Recovery R2T has already been sent.
+		 */
+		recovery = 1;
+
+		if (first_pdu[i].status != ISCSI_PDU_NOT_RECEIVED)
+			continue;
+
+		if (!offset)
+			offset = first_pdu[i].offset;
+		length += first_pdu[i].length;
+
+		send_recovery_r2t = 1;
+	}
+
+	if (send_recovery_r2t)
+		if (iscsit_send_recovery_r2t(cmd, offset, length) < 0)
+			return DATAOUT_CANNOT_RECOVER;
+
+	return (!recovery) ? DATAOUT_NORMAL : DATAOUT_WITHIN_COMMAND_RECOVERY;
+}
+
+static int iscsit_recalculate_dataout_values(
+	struct iscsi_cmd *cmd,
+	u32 pdu_offset,
+	u32 pdu_length,
+	u32 *r2t_offset,
+	u32 *r2t_length)
+{
+	int i;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_pdu *pdu = NULL;
+
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		cmd->data_sn = 0;
+
+		if (conn->sess->sess_ops->DataPDUInOrder) {
+			*r2t_offset = cmd->write_data_done;
+			*r2t_length = (cmd->seq_end_offset -
+					cmd->write_data_done);
+			return 0;
+		}
+
+		*r2t_offset = cmd->seq_start_offset;
+		*r2t_length = (cmd->seq_end_offset - cmd->seq_start_offset);
+
+		for (i = 0; i < cmd->pdu_count; i++) {
+			pdu = &cmd->pdu_list[i];
+
+			if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+				continue;
+
+			if ((pdu->offset >= cmd->seq_start_offset) &&
+			   ((pdu->offset + pdu->length) <=
+			     cmd->seq_end_offset)) {
+				if (!cmd->unsolicited_data)
+					cmd->next_burst_len -= pdu->length;
+				else
+					cmd->first_burst_len -= pdu->length;
+
+				cmd->write_data_done -= pdu->length;
+				pdu->status = ISCSI_PDU_NOT_RECEIVED;
+			}
+		}
+	} else {
+		struct iscsi_seq *seq = NULL;
+
+		seq = iscsit_get_seq_holder(cmd, pdu_offset, pdu_length);
+		if (!seq)
+			return -1;
+
+		*r2t_offset = seq->orig_offset;
+		*r2t_length = seq->xfer_len;
+
+		cmd->write_data_done -= (seq->offset - seq->orig_offset);
+		if (cmd->immediate_data)
+			cmd->first_burst_len = cmd->write_data_done;
+
+		seq->data_sn = 0;
+		seq->offset = seq->orig_offset;
+		seq->next_burst_len = 0;
+		seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+		if (conn->sess->sess_ops->DataPDUInOrder)
+			return 0;
+
+		for (i = 0; i < seq->pdu_count; i++) {
+			pdu = &cmd->pdu_list[i+seq->pdu_start];
+
+			if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+				continue;
+
+			pdu->status = ISCSI_PDU_NOT_RECEIVED;
+		}
+	}
+
+	return 0;
+}
+
+int iscsit_recover_dataout_sequence(
+	struct iscsi_cmd *cmd,
+	u32 pdu_offset,
+	u32 pdu_length)
+{
+	u32 r2t_length = 0, r2t_offset = 0;
+
+	spin_lock_bh(&cmd->istate_lock);
+	cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY;
+	spin_unlock_bh(&cmd->istate_lock);
+
+	if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length,
+			&r2t_offset, &r2t_length) < 0)
+		return DATAOUT_CANNOT_RECOVER;
+
+	iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length);
+
+	return DATAOUT_WITHIN_COMMAND_RECOVERY;
+}
+
+static struct iscsi_ooo_cmdsn *iscsit_allocate_ooo_cmdsn(void)
+{
+	struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL;
+
+	ooo_cmdsn = kmem_cache_zalloc(lio_ooo_cache, GFP_ATOMIC);
+	if (!ooo_cmdsn) {
+		pr_err("Unable to allocate memory for"
+			" struct iscsi_ooo_cmdsn.\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&ooo_cmdsn->ooo_list);
+
+	return ooo_cmdsn;
+}
+
+/*
+ *	Called with sess->cmdsn_mutex held.
+ */
+static int iscsit_attach_ooo_cmdsn(
+	struct iscsi_session *sess,
+	struct iscsi_ooo_cmdsn *ooo_cmdsn)
+{
+	struct iscsi_ooo_cmdsn *ooo_tail, *ooo_tmp;
+	/*
+	 * We attach the struct iscsi_ooo_cmdsn entry to the out of order
+	 * list in increasing CmdSN order.
+	 * This allows iscsi_execute_ooo_cmdsns() to detect any
+	 * additional CmdSN holes while performing delayed execution.
+	 */
+	if (list_empty(&sess->sess_ooo_cmdsn_list))
+		list_add_tail(&ooo_cmdsn->ooo_list,
+				&sess->sess_ooo_cmdsn_list);
+	else {
+		ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev,
+				typeof(*ooo_tail), ooo_list);
+		/*
+		 * CmdSN is greater than the tail of the list.
+		 */
+		if (ooo_tail->cmdsn < ooo_cmdsn->cmdsn)
+			list_add_tail(&ooo_cmdsn->ooo_list,
+					&sess->sess_ooo_cmdsn_list);
+		else {
+			/*
+			 * CmdSN is either lower than the head,  or somewhere
+			 * in the middle.
+			 */
+			list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
+						ooo_list) {
+				while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+					continue;
+
+				list_add(&ooo_cmdsn->ooo_list,
+					&ooo_tmp->ooo_list);
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	Removes an struct iscsi_ooo_cmdsn from a session's list,
+ *	called with struct iscsi_session->cmdsn_mutex held.
+ */
+void iscsit_remove_ooo_cmdsn(
+	struct iscsi_session *sess,
+	struct iscsi_ooo_cmdsn *ooo_cmdsn)
+{
+	list_del(&ooo_cmdsn->ooo_list);
+	kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+}
+
+void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
+{
+	struct iscsi_ooo_cmdsn *ooo_cmdsn;
+	struct iscsi_session *sess = conn->sess;
+
+	mutex_lock(&sess->cmdsn_mutex);
+	list_for_each_entry(ooo_cmdsn, &sess->sess_ooo_cmdsn_list, ooo_list) {
+		if (ooo_cmdsn->cid != conn->cid)
+			continue;
+
+		ooo_cmdsn->cmd = NULL;
+	}
+	mutex_unlock(&sess->cmdsn_mutex);
+}
+
+/*
+ *	Called with sess->cmdsn_mutex held.
+ */
+int iscsit_execute_ooo_cmdsns(struct iscsi_session *sess)
+{
+	int ooo_count = 0;
+	struct iscsi_cmd *cmd = NULL;
+	struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+
+	list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+				&sess->sess_ooo_cmdsn_list, ooo_list) {
+		if (ooo_cmdsn->cmdsn != sess->exp_cmd_sn)
+			continue;
+
+		if (!ooo_cmdsn->cmd) {
+			sess->exp_cmd_sn++;
+			iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+			continue;
+		}
+
+		cmd = ooo_cmdsn->cmd;
+		cmd->i_state = cmd->deferred_i_state;
+		ooo_count++;
+		sess->exp_cmd_sn++;
+		pr_debug("Executing out of order CmdSN: 0x%08x,"
+			" incremented ExpCmdSN to 0x%08x.\n",
+			cmd->cmd_sn, sess->exp_cmd_sn);
+
+		iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+
+		if (iscsit_execute_cmd(cmd, 1) < 0)
+			return -1;
+
+		continue;
+	}
+
+	return ooo_count;
+}
+
+/*
+ *	Called either:
+ *
+ *	1. With sess->cmdsn_mutex held from iscsi_execute_ooo_cmdsns()
+ *	or iscsi_check_received_cmdsn().
+ *	2. With no locks held directly from iscsi_handle_XXX_pdu() functions
+ *	for immediate commands.
+ */
+int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
+{
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+	int lr = 0;
+
+	spin_lock_bh(&cmd->istate_lock);
+	if (ooo)
+		cmd->cmd_flags &= ~ICF_OOO_CMDSN;
+
+	switch (cmd->iscsi_opcode) {
+	case ISCSI_OP_SCSI_CMD:
+		/*
+		 * Go ahead and send the CHECK_CONDITION status for
+		 * any SCSI CDB exceptions that may have occurred, also
+		 * handle the SCF_SCSI_RESERVATION_CONFLICT case here as well.
+		 */
+		if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) {
+			if (se_cmd->se_cmd_flags &
+					SCF_SCSI_RESERVATION_CONFLICT) {
+				cmd->i_state = ISTATE_SEND_STATUS;
+				spin_unlock_bh(&cmd->istate_lock);
+				iscsit_add_cmd_to_response_queue(cmd, cmd->conn,
+						cmd->i_state);
+				return 0;
+			}
+			spin_unlock_bh(&cmd->istate_lock);
+			/*
+			 * Determine if delayed TASK_ABORTED status for WRITEs
+			 * should be sent now if no unsolicited data out
+			 * payloads are expected, or if the delayed status
+			 * should be sent after unsolicited data out with
+			 * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out()
+			 */
+			if (transport_check_aborted_status(se_cmd,
+					(cmd->unsolicited_data == 0)) != 0)
+				return 0;
+			/*
+			 * Otherwise send CHECK_CONDITION and sense for
+			 * exception
+			 */
+			return transport_send_check_condition_and_sense(se_cmd,
+					se_cmd->scsi_sense_reason, 0);
+		}
+		/*
+		 * Special case for delayed CmdSN with Immediate
+		 * Data and/or Unsolicited Data Out attached.
+		 */
+		if (cmd->immediate_data) {
+			if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+				spin_unlock_bh(&cmd->istate_lock);
+				return transport_generic_handle_data(
+						&cmd->se_cmd);
+			}
+			spin_unlock_bh(&cmd->istate_lock);
+
+			if (!(cmd->cmd_flags &
+					ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
+				/*
+				 * Send the delayed TASK_ABORTED status for
+				 * WRITEs if no more unsolicitied data is
+				 * expected.
+				 */
+				if (transport_check_aborted_status(se_cmd, 1)
+						!= 0)
+					return 0;
+
+				iscsit_set_dataout_sequence_values(cmd);
+				iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 0);
+			}
+			return 0;
+		}
+		/*
+		 * The default handler.
+		 */
+		spin_unlock_bh(&cmd->istate_lock);
+
+		if ((cmd->data_direction == DMA_TO_DEVICE) &&
+		    !(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
+			/*
+			 * Send the delayed TASK_ABORTED status for WRITEs if
+			 * no more nsolicitied data is expected.
+			 */
+			if (transport_check_aborted_status(se_cmd, 1) != 0)
+				return 0;
+
+			iscsit_set_dataout_sequence_values(cmd);
+			spin_lock_bh(&cmd->dataout_timeout_lock);
+			iscsit_start_dataout_timer(cmd, cmd->conn);
+			spin_unlock_bh(&cmd->dataout_timeout_lock);
+		}
+		return transport_handle_cdb_direct(&cmd->se_cmd);
+
+	case ISCSI_OP_NOOP_OUT:
+	case ISCSI_OP_TEXT:
+		spin_unlock_bh(&cmd->istate_lock);
+		iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+		break;
+	case ISCSI_OP_SCSI_TMFUNC:
+		if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) {
+			spin_unlock_bh(&cmd->istate_lock);
+			iscsit_add_cmd_to_response_queue(cmd, cmd->conn,
+					cmd->i_state);
+			return 0;
+		}
+		spin_unlock_bh(&cmd->istate_lock);
+
+		return transport_generic_handle_tmr(&cmd->se_cmd);
+	case ISCSI_OP_LOGOUT:
+		spin_unlock_bh(&cmd->istate_lock);
+		switch (cmd->logout_reason) {
+		case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+			lr = iscsit_logout_closesession(cmd, cmd->conn);
+			break;
+		case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+			lr = iscsit_logout_closeconnection(cmd, cmd->conn);
+			break;
+		case ISCSI_LOGOUT_REASON_RECOVERY:
+			lr = iscsit_logout_removeconnforrecovery(cmd, cmd->conn);
+			break;
+		default:
+			pr_err("Unknown iSCSI Logout Request Code:"
+				" 0x%02x\n", cmd->logout_reason);
+			return -1;
+		}
+
+		return lr;
+	default:
+		spin_unlock_bh(&cmd->istate_lock);
+		pr_err("Cannot perform out of order execution for"
+		" unknown iSCSI Opcode: 0x%02x\n", cmd->iscsi_opcode);
+		return -1;
+	}
+
+	return 0;
+}
+
+void iscsit_free_all_ooo_cmdsns(struct iscsi_session *sess)
+{
+	struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+
+	mutex_lock(&sess->cmdsn_mutex);
+	list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+			&sess->sess_ooo_cmdsn_list, ooo_list) {
+
+		list_del(&ooo_cmdsn->ooo_list);
+		kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+	}
+	mutex_unlock(&sess->cmdsn_mutex);
+}
+
+int iscsit_handle_ooo_cmdsn(
+	struct iscsi_session *sess,
+	struct iscsi_cmd *cmd,
+	u32 cmdsn)
+{
+	int batch = 0;
+	struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL, *ooo_tail = NULL;
+
+	cmd->deferred_i_state		= cmd->i_state;
+	cmd->i_state			= ISTATE_DEFERRED_CMD;
+	cmd->cmd_flags			|= ICF_OOO_CMDSN;
+
+	if (list_empty(&sess->sess_ooo_cmdsn_list))
+		batch = 1;
+	else {
+		ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev,
+				typeof(*ooo_tail), ooo_list);
+		if (ooo_tail->cmdsn != (cmdsn - 1))
+			batch = 1;
+	}
+
+	ooo_cmdsn = iscsit_allocate_ooo_cmdsn();
+	if (!ooo_cmdsn)
+		return CMDSN_ERROR_CANNOT_RECOVER;
+
+	ooo_cmdsn->cmd			= cmd;
+	ooo_cmdsn->batch_count		= (batch) ?
+					  (cmdsn - sess->exp_cmd_sn) : 1;
+	ooo_cmdsn->cid			= cmd->conn->cid;
+	ooo_cmdsn->exp_cmdsn		= sess->exp_cmd_sn;
+	ooo_cmdsn->cmdsn		= cmdsn;
+
+	if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) {
+		kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+		return CMDSN_ERROR_CANNOT_RECOVER;
+	}
+
+	return CMDSN_HIGHER_THAN_EXP;
+}
+
+static int iscsit_set_dataout_timeout_values(
+	struct iscsi_cmd *cmd,
+	u32 *offset,
+	u32 *length)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_r2t *r2t;
+
+	if (cmd->unsolicited_data) {
+		*offset = 0;
+		*length = (conn->sess->sess_ops->FirstBurstLength >
+			   cmd->data_length) ?
+			   cmd->data_length :
+			   conn->sess->sess_ops->FirstBurstLength;
+		return 0;
+	}
+
+	spin_lock_bh(&cmd->r2t_lock);
+	if (list_empty(&cmd->cmd_r2t_list)) {
+		pr_err("cmd->cmd_r2t_list is empty!\n");
+		spin_unlock_bh(&cmd->r2t_lock);
+		return -1;
+	}
+
+	list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+		if (r2t->sent_r2t && !r2t->recovery_r2t && !r2t->seq_complete) {
+			*offset = r2t->offset;
+			*length = r2t->xfer_len;
+			spin_unlock_bh(&cmd->r2t_lock);
+			return 0;
+		}
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	pr_err("Unable to locate any incomplete DataOUT"
+		" sequences for ITT: 0x%08x.\n", cmd->init_task_tag);
+
+	return -1;
+}
+
+/*
+ *	NOTE: Called from interrupt (timer) context.
+ */
+static void iscsit_handle_dataout_timeout(unsigned long data)
+{
+	u32 pdu_length = 0, pdu_offset = 0;
+	u32 r2t_length = 0, r2t_offset = 0;
+	struct iscsi_cmd *cmd = (struct iscsi_cmd *) data;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_session *sess = NULL;
+	struct iscsi_node_attrib *na;
+
+	iscsit_inc_conn_usage_count(conn);
+
+	spin_lock_bh(&cmd->dataout_timeout_lock);
+	if (cmd->dataout_timer_flags & ISCSI_TF_STOP) {
+		spin_unlock_bh(&cmd->dataout_timeout_lock);
+		iscsit_dec_conn_usage_count(conn);
+		return;
+	}
+	cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING;
+	sess = conn->sess;
+	na = iscsit_tpg_get_node_attrib(sess);
+
+	if (!sess->sess_ops->ErrorRecoveryLevel) {
+		pr_debug("Unable to recover from DataOut timeout while"
+			" in ERL=0.\n");
+		goto failure;
+	}
+
+	if (++cmd->dataout_timeout_retries == na->dataout_timeout_retries) {
+		pr_debug("Command ITT: 0x%08x exceeded max retries"
+			" for DataOUT timeout %u, closing iSCSI connection.\n",
+			cmd->init_task_tag, na->dataout_timeout_retries);
+		goto failure;
+	}
+
+	cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY;
+
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		if (conn->sess->sess_ops->DataPDUInOrder) {
+			pdu_offset = cmd->write_data_done;
+			if ((pdu_offset + (conn->sess->sess_ops->MaxBurstLength -
+			     cmd->next_burst_len)) > cmd->data_length)
+				pdu_length = (cmd->data_length -
+					cmd->write_data_done);
+			else
+				pdu_length = (conn->sess->sess_ops->MaxBurstLength -
+						cmd->next_burst_len);
+		} else {
+			pdu_offset = cmd->seq_start_offset;
+			pdu_length = (cmd->seq_end_offset -
+				cmd->seq_start_offset);
+		}
+	} else {
+		if (iscsit_set_dataout_timeout_values(cmd, &pdu_offset,
+				&pdu_length) < 0)
+			goto failure;
+	}
+
+	if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length,
+			&r2t_offset, &r2t_length) < 0)
+		goto failure;
+
+	pr_debug("Command ITT: 0x%08x timed out waiting for"
+		" completion of %sDataOUT Sequence Offset: %u, Length: %u\n",
+		cmd->init_task_tag, (cmd->unsolicited_data) ? "Unsolicited " :
+		"", r2t_offset, r2t_length);
+
+	if (iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length) < 0)
+		goto failure;
+
+	iscsit_start_dataout_timer(cmd, conn);
+	spin_unlock_bh(&cmd->dataout_timeout_lock);
+	iscsit_dec_conn_usage_count(conn);
+
+	return;
+
+failure:
+	spin_unlock_bh(&cmd->dataout_timeout_lock);
+	iscsit_cause_connection_reinstatement(conn, 0);
+	iscsit_dec_conn_usage_count(conn);
+}
+
+void iscsit_mod_dataout_timer(struct iscsi_cmd *cmd)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess);
+
+	spin_lock_bh(&cmd->dataout_timeout_lock);
+	if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) {
+		spin_unlock_bh(&cmd->dataout_timeout_lock);
+		return;
+	}
+
+	mod_timer(&cmd->dataout_timer,
+		(get_jiffies_64() + na->dataout_timeout * HZ));
+	pr_debug("Updated DataOUT timer for ITT: 0x%08x",
+			cmd->init_task_tag);
+	spin_unlock_bh(&cmd->dataout_timeout_lock);
+}
+
+/*
+ *	Called with cmd->dataout_timeout_lock held.
+ */
+void iscsit_start_dataout_timer(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess);
+
+	if (cmd->dataout_timer_flags & ISCSI_TF_RUNNING)
+		return;
+
+	pr_debug("Starting DataOUT timer for ITT: 0x%08x on"
+		" CID: %hu.\n", cmd->init_task_tag, conn->cid);
+
+	init_timer(&cmd->dataout_timer);
+	cmd->dataout_timer.expires = (get_jiffies_64() + na->dataout_timeout * HZ);
+	cmd->dataout_timer.data = (unsigned long)cmd;
+	cmd->dataout_timer.function = iscsit_handle_dataout_timeout;
+	cmd->dataout_timer_flags &= ~ISCSI_TF_STOP;
+	cmd->dataout_timer_flags |= ISCSI_TF_RUNNING;
+	add_timer(&cmd->dataout_timer);
+}
+
+void iscsit_stop_dataout_timer(struct iscsi_cmd *cmd)
+{
+	spin_lock_bh(&cmd->dataout_timeout_lock);
+	if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) {
+		spin_unlock_bh(&cmd->dataout_timeout_lock);
+		return;
+	}
+	cmd->dataout_timer_flags |= ISCSI_TF_STOP;
+	spin_unlock_bh(&cmd->dataout_timeout_lock);
+
+	del_timer_sync(&cmd->dataout_timer);
+
+	spin_lock_bh(&cmd->dataout_timeout_lock);
+	cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING;
+	pr_debug("Stopped DataOUT Timer for ITT: 0x%08x\n",
+			cmd->init_task_tag);
+	spin_unlock_bh(&cmd->dataout_timeout_lock);
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl1.h b/drivers/target/iscsi/iscsi_target_erl1.h
new file mode 100644
index 0000000..85e67e2
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl1.h
@@ -0,0 +1,26 @@
+#ifndef ISCSI_TARGET_ERL1_H
+#define ISCSI_TARGET_ERL1_H
+
+extern int iscsit_dump_data_payload(struct iscsi_conn *, u32, int);
+extern int iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+			struct iscsi_cmd *, struct iscsi_datain_req *);
+extern int iscsit_create_recovery_datain_values_datasequenceinorder_no(
+			struct iscsi_cmd *, struct iscsi_datain_req *);
+extern int iscsit_handle_recovery_datain_or_r2t(struct iscsi_conn *, unsigned char *,
+			u32, u32, u32, u32);
+extern int iscsit_handle_status_snack(struct iscsi_conn *, u32, u32,
+			u32, u32);
+extern int iscsit_handle_data_ack(struct iscsi_conn *, u32, u32, u32);
+extern int iscsit_dataout_datapduinorder_no_fbit(struct iscsi_cmd *, struct iscsi_pdu *);
+extern int iscsit_recover_dataout_sequence(struct iscsi_cmd *, u32, u32);
+extern void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *);
+extern void iscsit_free_all_ooo_cmdsns(struct iscsi_session *);
+extern int iscsit_execute_ooo_cmdsns(struct iscsi_session *);
+extern int iscsit_execute_cmd(struct iscsi_cmd *, int);
+extern int iscsit_handle_ooo_cmdsn(struct iscsi_session *, struct iscsi_cmd *, u32);
+extern void iscsit_remove_ooo_cmdsn(struct iscsi_session *, struct iscsi_ooo_cmdsn *);
+extern void iscsit_mod_dataout_timer(struct iscsi_cmd *);
+extern void iscsit_start_dataout_timer(struct iscsi_cmd *, struct iscsi_conn *);
+extern void iscsit_stop_dataout_timer(struct iscsi_cmd *);
+
+#endif /* ISCSI_TARGET_ERL1_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c
new file mode 100644
index 0000000..91a4d17
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl2.c
@@ -0,0 +1,474 @@
+/*******************************************************************************
+ * This file contains error recovery level two functions used by
+ * the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target.h"
+
+/*
+ *	FIXME: Does RData SNACK apply here as well?
+ */
+void iscsit_create_conn_recovery_datain_values(
+	struct iscsi_cmd *cmd,
+	u32 exp_data_sn)
+{
+	u32 data_sn = 0;
+	struct iscsi_conn *conn = cmd->conn;
+
+	cmd->next_burst_len = 0;
+	cmd->read_data_done = 0;
+
+	while (exp_data_sn > data_sn) {
+		if ((cmd->next_burst_len +
+		     conn->conn_ops->MaxRecvDataSegmentLength) <
+		     conn->sess->sess_ops->MaxBurstLength) {
+			cmd->read_data_done +=
+			       conn->conn_ops->MaxRecvDataSegmentLength;
+			cmd->next_burst_len +=
+			       conn->conn_ops->MaxRecvDataSegmentLength;
+		} else {
+			cmd->read_data_done +=
+				(conn->sess->sess_ops->MaxBurstLength -
+				cmd->next_burst_len);
+			cmd->next_burst_len = 0;
+		}
+		data_sn++;
+	}
+}
+
+void iscsit_create_conn_recovery_dataout_values(
+	struct iscsi_cmd *cmd)
+{
+	u32 write_data_done = 0;
+	struct iscsi_conn *conn = cmd->conn;
+
+	cmd->data_sn = 0;
+	cmd->next_burst_len = 0;
+
+	while (cmd->write_data_done > write_data_done) {
+		if ((write_data_done + conn->sess->sess_ops->MaxBurstLength) <=
+		     cmd->write_data_done)
+			write_data_done += conn->sess->sess_ops->MaxBurstLength;
+		else
+			break;
+	}
+
+	cmd->write_data_done = write_data_done;
+}
+
+static int iscsit_attach_active_connection_recovery_entry(
+	struct iscsi_session *sess,
+	struct iscsi_conn_recovery *cr)
+{
+	spin_lock(&sess->cr_a_lock);
+	list_add_tail(&cr->cr_list, &sess->cr_active_list);
+	spin_unlock(&sess->cr_a_lock);
+
+	return 0;
+}
+
+static int iscsit_attach_inactive_connection_recovery_entry(
+	struct iscsi_session *sess,
+	struct iscsi_conn_recovery *cr)
+{
+	spin_lock(&sess->cr_i_lock);
+	list_add_tail(&cr->cr_list, &sess->cr_inactive_list);
+
+	sess->conn_recovery_count++;
+	pr_debug("Incremented connection recovery count to %u for"
+		" SID: %u\n", sess->conn_recovery_count, sess->sid);
+	spin_unlock(&sess->cr_i_lock);
+
+	return 0;
+}
+
+struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry(
+	struct iscsi_session *sess,
+	u16 cid)
+{
+	struct iscsi_conn_recovery *cr;
+
+	spin_lock(&sess->cr_i_lock);
+	list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) {
+		if (cr->cid == cid) {
+			spin_unlock(&sess->cr_i_lock);
+			return cr;
+		}
+	}
+	spin_unlock(&sess->cr_i_lock);
+
+	return NULL;
+}
+
+void iscsit_free_connection_recovery_entires(struct iscsi_session *sess)
+{
+	struct iscsi_cmd *cmd, *cmd_tmp;
+	struct iscsi_conn_recovery *cr, *cr_tmp;
+
+	spin_lock(&sess->cr_a_lock);
+	list_for_each_entry_safe(cr, cr_tmp, &sess->cr_active_list, cr_list) {
+		list_del(&cr->cr_list);
+		spin_unlock(&sess->cr_a_lock);
+
+		spin_lock(&cr->conn_recovery_cmd_lock);
+		list_for_each_entry_safe(cmd, cmd_tmp,
+				&cr->conn_recovery_cmd_list, i_list) {
+
+			list_del(&cmd->i_list);
+			cmd->conn = NULL;
+			spin_unlock(&cr->conn_recovery_cmd_lock);
+			if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+			    !(cmd->se_cmd.transport_wait_for_tasks))
+				iscsit_release_cmd(cmd);
+			else
+				cmd->se_cmd.transport_wait_for_tasks(
+						&cmd->se_cmd, 1, 1);
+			spin_lock(&cr->conn_recovery_cmd_lock);
+		}
+		spin_unlock(&cr->conn_recovery_cmd_lock);
+		spin_lock(&sess->cr_a_lock);
+
+		kfree(cr);
+	}
+	spin_unlock(&sess->cr_a_lock);
+
+	spin_lock(&sess->cr_i_lock);
+	list_for_each_entry_safe(cr, cr_tmp, &sess->cr_inactive_list, cr_list) {
+		list_del(&cr->cr_list);
+		spin_unlock(&sess->cr_i_lock);
+
+		spin_lock(&cr->conn_recovery_cmd_lock);
+		list_for_each_entry_safe(cmd, cmd_tmp,
+				&cr->conn_recovery_cmd_list, i_list) {
+
+			list_del(&cmd->i_list);
+			cmd->conn = NULL;
+			spin_unlock(&cr->conn_recovery_cmd_lock);
+			if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+			    !(cmd->se_cmd.transport_wait_for_tasks))
+				iscsit_release_cmd(cmd);
+			else
+				cmd->se_cmd.transport_wait_for_tasks(
+						&cmd->se_cmd, 1, 1);
+			spin_lock(&cr->conn_recovery_cmd_lock);
+		}
+		spin_unlock(&cr->conn_recovery_cmd_lock);
+		spin_lock(&sess->cr_i_lock);
+
+		kfree(cr);
+	}
+	spin_unlock(&sess->cr_i_lock);
+}
+
+int iscsit_remove_active_connection_recovery_entry(
+	struct iscsi_conn_recovery *cr,
+	struct iscsi_session *sess)
+{
+	spin_lock(&sess->cr_a_lock);
+	list_del(&cr->cr_list);
+
+	sess->conn_recovery_count--;
+	pr_debug("Decremented connection recovery count to %u for"
+		" SID: %u\n", sess->conn_recovery_count, sess->sid);
+	spin_unlock(&sess->cr_a_lock);
+
+	kfree(cr);
+
+	return 0;
+}
+
+int iscsit_remove_inactive_connection_recovery_entry(
+	struct iscsi_conn_recovery *cr,
+	struct iscsi_session *sess)
+{
+	spin_lock(&sess->cr_i_lock);
+	list_del(&cr->cr_list);
+	spin_unlock(&sess->cr_i_lock);
+
+	return 0;
+}
+
+/*
+ *	Called with cr->conn_recovery_cmd_lock help.
+ */
+int iscsit_remove_cmd_from_connection_recovery(
+	struct iscsi_cmd *cmd,
+	struct iscsi_session *sess)
+{
+	struct iscsi_conn_recovery *cr;
+
+	if (!cmd->cr) {
+		pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+			" is NULL!\n", cmd->init_task_tag);
+		BUG();
+	}
+	cr = cmd->cr;
+
+	list_del(&cmd->i_list);
+	return --cr->cmd_count;
+}
+
+void iscsit_discard_cr_cmds_by_expstatsn(
+	struct iscsi_conn_recovery *cr,
+	u32 exp_statsn)
+{
+	u32 dropped_count = 0;
+	struct iscsi_cmd *cmd, *cmd_tmp;
+	struct iscsi_session *sess = cr->sess;
+
+	spin_lock(&cr->conn_recovery_cmd_lock);
+	list_for_each_entry_safe(cmd, cmd_tmp,
+			&cr->conn_recovery_cmd_list, i_list) {
+
+		if (((cmd->deferred_i_state != ISTATE_SENT_STATUS) &&
+		     (cmd->deferred_i_state != ISTATE_REMOVE)) ||
+		     (cmd->stat_sn >= exp_statsn)) {
+			continue;
+		}
+
+		dropped_count++;
+		pr_debug("Dropping Acknowledged ITT: 0x%08x, StatSN:"
+			" 0x%08x, CID: %hu.\n", cmd->init_task_tag,
+				cmd->stat_sn, cr->cid);
+
+		iscsit_remove_cmd_from_connection_recovery(cmd, sess);
+
+		spin_unlock(&cr->conn_recovery_cmd_lock);
+		if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+		    !(cmd->se_cmd.transport_wait_for_tasks))
+			iscsit_release_cmd(cmd);
+		else
+			cmd->se_cmd.transport_wait_for_tasks(
+					&cmd->se_cmd, 1, 0);
+		spin_lock(&cr->conn_recovery_cmd_lock);
+	}
+	spin_unlock(&cr->conn_recovery_cmd_lock);
+
+	pr_debug("Dropped %u total acknowledged commands on"
+		" CID: %hu less than old ExpStatSN: 0x%08x\n",
+			dropped_count, cr->cid, exp_statsn);
+
+	if (!cr->cmd_count) {
+		pr_debug("No commands to be reassigned for failed"
+			" connection CID: %hu on SID: %u\n",
+			cr->cid, sess->sid);
+		iscsit_remove_inactive_connection_recovery_entry(cr, sess);
+		iscsit_attach_active_connection_recovery_entry(sess, cr);
+		pr_debug("iSCSI connection recovery successful for CID:"
+			" %hu on SID: %u\n", cr->cid, sess->sid);
+		iscsit_remove_active_connection_recovery_entry(cr, sess);
+	} else {
+		iscsit_remove_inactive_connection_recovery_entry(cr, sess);
+		iscsit_attach_active_connection_recovery_entry(sess, cr);
+	}
+}
+
+int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
+{
+	u32 dropped_count = 0;
+	struct iscsi_cmd *cmd, *cmd_tmp;
+	struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+	struct iscsi_session *sess = conn->sess;
+
+	mutex_lock(&sess->cmdsn_mutex);
+	list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+			&sess->sess_ooo_cmdsn_list, ooo_list) {
+
+		if (ooo_cmdsn->cid != conn->cid)
+			continue;
+
+		dropped_count++;
+		pr_debug("Dropping unacknowledged CmdSN:"
+		" 0x%08x during connection recovery on CID: %hu\n",
+			ooo_cmdsn->cmdsn, conn->cid);
+		iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+	}
+	mutex_unlock(&sess->cmdsn_mutex);
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+		if (!(cmd->cmd_flags & ICF_OOO_CMDSN))
+			continue;
+
+		list_del(&cmd->i_list);
+
+		spin_unlock_bh(&conn->cmd_lock);
+		if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+		    !(cmd->se_cmd.transport_wait_for_tasks))
+			iscsit_release_cmd(cmd);
+		else
+			cmd->se_cmd.transport_wait_for_tasks(
+					&cmd->se_cmd, 1, 1);
+		spin_lock_bh(&conn->cmd_lock);
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+
+	pr_debug("Dropped %u total unacknowledged commands on CID:"
+		" %hu for ExpCmdSN: 0x%08x.\n", dropped_count, conn->cid,
+				sess->exp_cmd_sn);
+	return 0;
+}
+
+int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
+{
+	u32 cmd_count = 0;
+	struct iscsi_cmd *cmd, *cmd_tmp;
+	struct iscsi_conn_recovery *cr;
+
+	/*
+	 * Allocate an struct iscsi_conn_recovery for this connection.
+	 * Each struct iscsi_cmd contains an struct iscsi_conn_recovery pointer
+	 * (struct iscsi_cmd->cr) so we need to allocate this before preparing the
+	 * connection's command list for connection recovery.
+	 */
+	cr = kzalloc(sizeof(struct iscsi_conn_recovery), GFP_KERNEL);
+	if (!cr) {
+		pr_err("Unable to allocate memory for"
+			" struct iscsi_conn_recovery.\n");
+		return -1;
+	}
+	INIT_LIST_HEAD(&cr->cr_list);
+	INIT_LIST_HEAD(&cr->conn_recovery_cmd_list);
+	spin_lock_init(&cr->conn_recovery_cmd_lock);
+	/*
+	 * Only perform connection recovery on ISCSI_OP_SCSI_CMD or
+	 * ISCSI_OP_NOOP_OUT opcodes.  For all other opcodes call
+	 * list_del(&cmd->i_list); to release the command to the
+	 * session pool and remove it from the connection's list.
+	 *
+	 * Also stop the DataOUT timer, which will be restarted after
+	 * sending the TMR response.
+	 */
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+
+		if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) &&
+		    (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) {
+			pr_debug("Not performing realligence on"
+				" Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x,"
+				" CID: %hu\n", cmd->iscsi_opcode,
+				cmd->init_task_tag, cmd->cmd_sn, conn->cid);
+
+			list_del(&cmd->i_list);
+			spin_unlock_bh(&conn->cmd_lock);
+
+			if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+			    !(cmd->se_cmd.transport_wait_for_tasks))
+				iscsit_release_cmd(cmd);
+			else
+				cmd->se_cmd.transport_wait_for_tasks(
+						&cmd->se_cmd, 1, 0);
+			spin_lock_bh(&conn->cmd_lock);
+			continue;
+		}
+
+		/*
+		 * Special case where commands greater than or equal to
+		 * the session's ExpCmdSN are attached to the connection
+		 * list but not to the out of order CmdSN list.  The one
+		 * obvious case is when a command with immediate data
+		 * attached must only check the CmdSN against ExpCmdSN
+		 * after the data is received.  The special case below
+		 * is when the connection fails before data is received,
+		 * but also may apply to other PDUs, so it has been
+		 * made generic here.
+		 */
+		if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd &&
+		     (cmd->cmd_sn >= conn->sess->exp_cmd_sn)) {
+			list_del(&cmd->i_list);
+			spin_unlock_bh(&conn->cmd_lock);
+
+			if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+			    !(cmd->se_cmd.transport_wait_for_tasks))
+				iscsit_release_cmd(cmd);
+			else
+				cmd->se_cmd.transport_wait_for_tasks(
+						&cmd->se_cmd, 1, 1);
+			spin_lock_bh(&conn->cmd_lock);
+			continue;
+		}
+
+		cmd_count++;
+		pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x,"
+			" CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for"
+			" realligence.\n", cmd->iscsi_opcode,
+			cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn,
+			conn->cid);
+
+		cmd->deferred_i_state = cmd->i_state;
+		cmd->i_state = ISTATE_IN_CONNECTION_RECOVERY;
+
+		if (cmd->data_direction == DMA_TO_DEVICE)
+			iscsit_stop_dataout_timer(cmd);
+
+		cmd->sess = conn->sess;
+
+		list_del(&cmd->i_list);
+		spin_unlock_bh(&conn->cmd_lock);
+
+		iscsit_free_all_datain_reqs(cmd);
+
+		if ((cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) &&
+		     cmd->se_cmd.transport_wait_for_tasks)
+			cmd->se_cmd.transport_wait_for_tasks(&cmd->se_cmd,
+					0, 0);
+		/*
+		 * Add the struct iscsi_cmd to the connection recovery cmd list
+		 */
+		spin_lock(&cr->conn_recovery_cmd_lock);
+		list_add_tail(&cmd->i_list, &cr->conn_recovery_cmd_list);
+		spin_unlock(&cr->conn_recovery_cmd_lock);
+
+		spin_lock_bh(&conn->cmd_lock);
+		cmd->cr = cr;
+		cmd->conn = NULL;
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+	/*
+	 * Fill in the various values in the preallocated struct iscsi_conn_recovery.
+	 */
+	cr->cid = conn->cid;
+	cr->cmd_count = cmd_count;
+	cr->maxrecvdatasegmentlength = conn->conn_ops->MaxRecvDataSegmentLength;
+	cr->sess = conn->sess;
+
+	iscsit_attach_inactive_connection_recovery_entry(conn->sess, cr);
+
+	return 0;
+}
+
+int iscsit_connection_recovery_transport_reset(struct iscsi_conn *conn)
+{
+	atomic_set(&conn->connection_recovery, 1);
+
+	if (iscsit_close_connection(conn) < 0)
+		return -1;
+
+	return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h
new file mode 100644
index 0000000..22f8d24
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl2.h
@@ -0,0 +1,18 @@
+#ifndef ISCSI_TARGET_ERL2_H
+#define ISCSI_TARGET_ERL2_H
+
+extern void iscsit_create_conn_recovery_datain_values(struct iscsi_cmd *, u32);
+extern void iscsit_create_conn_recovery_dataout_values(struct iscsi_cmd *);
+extern struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry(
+			struct iscsi_session *, u16);
+extern void iscsit_free_connection_recovery_entires(struct iscsi_session *);
+extern int iscsit_remove_active_connection_recovery_entry(
+			struct iscsi_conn_recovery *, struct iscsi_session *);
+extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *,
+			struct iscsi_session *);
+extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32);
+extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *);
+extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *);
+extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_ERL2_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
new file mode 100644
index 0000000..bcaf82f
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -0,0 +1,1232 @@
+/*******************************************************************************
+ * This file contains the login functions used by the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_stat.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_parameters.h"
+
+extern struct idr sess_idr;
+extern struct mutex auth_id_lock;
+extern spinlock_t sess_idr_lock;
+
+static int iscsi_login_init_conn(struct iscsi_conn *conn)
+{
+	INIT_LIST_HEAD(&conn->conn_list);
+	INIT_LIST_HEAD(&conn->conn_cmd_list);
+	INIT_LIST_HEAD(&conn->immed_queue_list);
+	INIT_LIST_HEAD(&conn->response_queue_list);
+	init_completion(&conn->conn_post_wait_comp);
+	init_completion(&conn->conn_wait_comp);
+	init_completion(&conn->conn_wait_rcfr_comp);
+	init_completion(&conn->conn_waiting_on_uc_comp);
+	init_completion(&conn->conn_logout_comp);
+	init_completion(&conn->rx_half_close_comp);
+	init_completion(&conn->tx_half_close_comp);
+	spin_lock_init(&conn->cmd_lock);
+	spin_lock_init(&conn->conn_usage_lock);
+	spin_lock_init(&conn->immed_queue_lock);
+	spin_lock_init(&conn->nopin_timer_lock);
+	spin_lock_init(&conn->response_queue_lock);
+	spin_lock_init(&conn->state_lock);
+
+	if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) {
+		pr_err("Unable to allocate conn->conn_cpumask\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup
+ * per struct iscsi_conn libcrypto contexts for crc32c and crc32-intel
+ */
+int iscsi_login_setup_crypto(struct iscsi_conn *conn)
+{
+	/*
+	 * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts
+	 * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback
+	 * to software 1x8 byte slicing from crc32c.ko
+	 */
+	conn->conn_rx_hash.flags = 0;
+	conn->conn_rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+						CRYPTO_ALG_ASYNC);
+	if (IS_ERR(conn->conn_rx_hash.tfm)) {
+		pr_err("crypto_alloc_hash() failed for conn_rx_tfm\n");
+		return -ENOMEM;
+	}
+
+	conn->conn_tx_hash.flags = 0;
+	conn->conn_tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+						CRYPTO_ALG_ASYNC);
+	if (IS_ERR(conn->conn_tx_hash.tfm)) {
+		pr_err("crypto_alloc_hash() failed for conn_tx_tfm\n");
+		crypto_free_hash(conn->conn_rx_hash.tfm);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int iscsi_login_check_initiator_version(
+	struct iscsi_conn *conn,
+	u8 version_max,
+	u8 version_min)
+{
+	if ((version_max != 0x00) || (version_min != 0x00)) {
+		pr_err("Unsupported iSCSI IETF Pre-RFC Revision,"
+			" version Min/Max 0x%02x/0x%02x, rejecting login.\n",
+			version_min, version_max);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_NO_VERSION);
+		return -1;
+	}
+
+	return 0;
+}
+
+int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
+{
+	int sessiontype;
+	struct iscsi_param *initiatorname_param = NULL, *sessiontype_param = NULL;
+	struct iscsi_portal_group *tpg = conn->tpg;
+	struct iscsi_session *sess = NULL, *sess_p = NULL;
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+	struct se_session *se_sess, *se_sess_tmp;
+
+	initiatorname_param = iscsi_find_param_from_key(
+			INITIATORNAME, conn->param_list);
+	if (!initiatorname_param)
+		return -1;
+
+	sessiontype_param = iscsi_find_param_from_key(
+			SESSIONTYPE, conn->param_list);
+	if (!sessiontype_param)
+		return -1;
+
+	sessiontype = (strncmp(sessiontype_param->value, NORMAL, 6)) ? 1 : 0;
+
+	spin_lock_bh(&se_tpg->session_lock);
+	list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+			sess_list) {
+
+		sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		spin_lock(&sess_p->conn_lock);
+		if (atomic_read(&sess_p->session_fall_back_to_erl0) ||
+		    atomic_read(&sess_p->session_logout) ||
+		    (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+			spin_unlock(&sess_p->conn_lock);
+			continue;
+		}
+		if (!memcmp((void *)sess_p->isid, (void *)conn->sess->isid, 6) &&
+		   (!strcmp((void *)sess_p->sess_ops->InitiatorName,
+			    (void *)initiatorname_param->value) &&
+		   (sess_p->sess_ops->SessionType == sessiontype))) {
+			atomic_set(&sess_p->session_reinstatement, 1);
+			spin_unlock(&sess_p->conn_lock);
+			iscsit_inc_session_usage_count(sess_p);
+			iscsit_stop_time2retain_timer(sess_p);
+			sess = sess_p;
+			break;
+		}
+		spin_unlock(&sess_p->conn_lock);
+	}
+	spin_unlock_bh(&se_tpg->session_lock);
+	/*
+	 * If the Time2Retain handler has expired, the session is already gone.
+	 */
+	if (!sess)
+		return 0;
+
+	pr_debug("%s iSCSI Session SID %u is still active for %s,"
+		" preforming session reinstatement.\n", (sessiontype) ?
+		"Discovery" : "Normal", sess->sid,
+		sess->sess_ops->InitiatorName);
+
+	spin_lock_bh(&sess->conn_lock);
+	if (sess->session_state == TARG_SESS_STATE_FAILED) {
+		spin_unlock_bh(&sess->conn_lock);
+		iscsit_dec_session_usage_count(sess);
+		return iscsit_close_session(sess);
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	iscsit_stop_session(sess, 1, 1);
+	iscsit_dec_session_usage_count(sess);
+
+	return iscsit_close_session(sess);
+}
+
+static void iscsi_login_set_conn_values(
+	struct iscsi_session *sess,
+	struct iscsi_conn *conn,
+	u16 cid)
+{
+	conn->sess		= sess;
+	conn->cid		= cid;
+	/*
+	 * Generate a random Status sequence number (statsn) for the new
+	 * iSCSI connection.
+	 */
+	get_random_bytes(&conn->stat_sn, sizeof(u32));
+
+	mutex_lock(&auth_id_lock);
+	conn->auth_id		= iscsit_global->auth_id++;
+	mutex_unlock(&auth_id_lock);
+}
+
+/*
+ *	This is the leading connection of a new session,
+ *	or session reinstatement.
+ */
+static int iscsi_login_zero_tsih_s1(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	struct iscsi_session *sess = NULL;
+	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+	sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL);
+	if (!sess) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		pr_err("Could not allocate memory for session\n");
+		return -1;
+	}
+
+	iscsi_login_set_conn_values(sess, conn, pdu->cid);
+	sess->init_task_tag	= pdu->itt;
+	memcpy((void *)&sess->isid, (void *)pdu->isid, 6);
+	sess->exp_cmd_sn	= pdu->cmdsn;
+	INIT_LIST_HEAD(&sess->sess_conn_list);
+	INIT_LIST_HEAD(&sess->sess_ooo_cmdsn_list);
+	INIT_LIST_HEAD(&sess->cr_active_list);
+	INIT_LIST_HEAD(&sess->cr_inactive_list);
+	init_completion(&sess->async_msg_comp);
+	init_completion(&sess->reinstatement_comp);
+	init_completion(&sess->session_wait_comp);
+	init_completion(&sess->session_waiting_on_uc_comp);
+	mutex_init(&sess->cmdsn_mutex);
+	spin_lock_init(&sess->conn_lock);
+	spin_lock_init(&sess->cr_a_lock);
+	spin_lock_init(&sess->cr_i_lock);
+	spin_lock_init(&sess->session_usage_lock);
+	spin_lock_init(&sess->ttt_lock);
+
+	if (!idr_pre_get(&sess_idr, GFP_KERNEL)) {
+		pr_err("idr_pre_get() for sess_idr failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+	spin_lock(&sess_idr_lock);
+	idr_get_new(&sess_idr, NULL, &sess->session_index);
+	spin_unlock(&sess_idr_lock);
+
+	sess->creation_time = get_jiffies_64();
+	spin_lock_init(&sess->session_stats_lock);
+	/*
+	 * The FFP CmdSN window values will be allocated from the TPG's
+	 * Initiator Node's ACL once the login has been successfully completed.
+	 */
+	sess->max_cmd_sn	= pdu->cmdsn;
+
+	sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL);
+	if (!sess->sess_ops) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		pr_err("Unable to allocate memory for"
+				" struct iscsi_sess_ops.\n");
+		return -1;
+	}
+
+	sess->se_sess = transport_init_session();
+	if (!sess->se_sess) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int iscsi_login_zero_tsih_s2(
+	struct iscsi_conn *conn)
+{
+	struct iscsi_node_attrib *na;
+	struct iscsi_session *sess = conn->sess;
+	unsigned char buf[32];
+
+	sess->tpg = conn->tpg;
+
+	/*
+	 * Assign a new TPG Session Handle.  Note this is protected with
+	 * struct iscsi_portal_group->np_login_sem from iscsit_access_np().
+	 */
+	sess->tsih = ++ISCSI_TPG_S(sess)->ntsih;
+	if (!sess->tsih)
+		sess->tsih = ++ISCSI_TPG_S(sess)->ntsih;
+
+	/*
+	 * Create the default params from user defined values..
+	 */
+	if (iscsi_copy_param_list(&conn->param_list,
+				ISCSI_TPG_C(conn)->param_list, 1) < 0) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+
+	iscsi_set_keys_to_negotiate(0, conn->param_list);
+
+	if (sess->sess_ops->SessionType)
+		return iscsi_set_keys_irrelevant_for_discovery(
+				conn->param_list);
+
+	na = iscsit_tpg_get_node_attrib(sess);
+
+	/*
+	 * Need to send TargetPortalGroupTag back in first login response
+	 * on any iSCSI connection where the Initiator provides TargetName.
+	 * See 5.3.1.  Login Phase Start
+	 *
+	 * In our case, we have already located the struct iscsi_tiqn at this point.
+	 */
+	memset(buf, 0, 32);
+	sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt);
+	if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+
+	/*
+	 * Workaround for Initiators that have broken connection recovery logic.
+	 *
+	 * "We would really like to get rid of this." Linux-iSCSI.org team
+	 */
+	memset(buf, 0, 32);
+	sprintf(buf, "ErrorRecoveryLevel=%d", na->default_erl);
+	if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+
+	if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Remove PSTATE_NEGOTIATE for the four FIM related keys.
+ * The Initiator node will be able to enable FIM by proposing them itself.
+ */
+int iscsi_login_disable_FIM_keys(
+	struct iscsi_param_list *param_list,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_param *param;
+
+	param = iscsi_find_param_from_key("OFMarker", param_list);
+	if (!param) {
+		pr_err("iscsi_find_param_from_key() for"
+				" OFMarker failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+	param->state &= ~PSTATE_NEGOTIATE;
+
+	param = iscsi_find_param_from_key("OFMarkInt", param_list);
+	if (!param) {
+		pr_err("iscsi_find_param_from_key() for"
+				" IFMarker failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+	param->state &= ~PSTATE_NEGOTIATE;
+
+	param = iscsi_find_param_from_key("IFMarker", param_list);
+	if (!param) {
+		pr_err("iscsi_find_param_from_key() for"
+				" IFMarker failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+	param->state &= ~PSTATE_NEGOTIATE;
+
+	param = iscsi_find_param_from_key("IFMarkInt", param_list);
+	if (!param) {
+		pr_err("iscsi_find_param_from_key() for"
+				" IFMarker failed\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+	param->state &= ~PSTATE_NEGOTIATE;
+
+	return 0;
+}
+
+static int iscsi_login_non_zero_tsih_s1(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+	iscsi_login_set_conn_values(NULL, conn, pdu->cid);
+	return 0;
+}
+
+/*
+ *	Add a new connection to an existing session.
+ */
+static int iscsi_login_non_zero_tsih_s2(
+	struct iscsi_conn *conn,
+	unsigned char *buf)
+{
+	struct iscsi_portal_group *tpg = conn->tpg;
+	struct iscsi_session *sess = NULL, *sess_p = NULL;
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+	struct se_session *se_sess, *se_sess_tmp;
+	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+	spin_lock_bh(&se_tpg->session_lock);
+	list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+			sess_list) {
+
+		sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (atomic_read(&sess_p->session_fall_back_to_erl0) ||
+		    atomic_read(&sess_p->session_logout) ||
+		   (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED))
+			continue;
+		if (!memcmp((const void *)sess_p->isid,
+		     (const void *)pdu->isid, 6) &&
+		     (sess_p->tsih == pdu->tsih)) {
+			iscsit_inc_session_usage_count(sess_p);
+			iscsit_stop_time2retain_timer(sess_p);
+			sess = sess_p;
+			break;
+		}
+	}
+	spin_unlock_bh(&se_tpg->session_lock);
+
+	/*
+	 * If the Time2Retain handler has expired, the session is already gone.
+	 */
+	if (!sess) {
+		pr_err("Initiator attempting to add a connection to"
+			" a non-existent session, rejecting iSCSI Login.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_NO_SESSION);
+		return -1;
+	}
+
+	/*
+	 * Stop the Time2Retain timer if this is a failed session, we restart
+	 * the timer if the login is not successful.
+	 */
+	spin_lock_bh(&sess->conn_lock);
+	if (sess->session_state == TARG_SESS_STATE_FAILED)
+		atomic_set(&sess->session_continuation, 1);
+	spin_unlock_bh(&sess->conn_lock);
+
+	iscsi_login_set_conn_values(sess, conn, pdu->cid);
+
+	if (iscsi_copy_param_list(&conn->param_list,
+			ISCSI_TPG_C(conn)->param_list, 0) < 0) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+
+	iscsi_set_keys_to_negotiate(0, conn->param_list);
+	/*
+	 * Need to send TargetPortalGroupTag back in first login response
+	 * on any iSCSI connection where the Initiator provides TargetName.
+	 * See 5.3.1.  Login Phase Start
+	 *
+	 * In our case, we have already located the struct iscsi_tiqn at this point.
+	 */
+	memset(buf, 0, 32);
+	sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt);
+	if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		return -1;
+	}
+
+	return iscsi_login_disable_FIM_keys(conn->param_list, conn);
+}
+
+int iscsi_login_post_auth_non_zero_tsih(
+	struct iscsi_conn *conn,
+	u16 cid,
+	u32 exp_statsn)
+{
+	struct iscsi_conn *conn_ptr = NULL;
+	struct iscsi_conn_recovery *cr = NULL;
+	struct iscsi_session *sess = conn->sess;
+
+	/*
+	 * By following item 5 in the login table,  if we have found
+	 * an existing ISID and a valid/existing TSIH and an existing
+	 * CID we do connection reinstatement.  Currently we dont not
+	 * support it so we send back an non-zero status class to the
+	 * initiator and release the new connection.
+	 */
+	conn_ptr = iscsit_get_conn_from_cid_rcfr(sess, cid);
+	if ((conn_ptr)) {
+		pr_err("Connection exists with CID %hu for %s,"
+			" performing connection reinstatement.\n",
+			conn_ptr->cid, sess->sess_ops->InitiatorName);
+
+		iscsit_connection_reinstatement_rcfr(conn_ptr);
+		iscsit_dec_conn_usage_count(conn_ptr);
+	}
+
+	/*
+	 * Check for any connection recovery entires containing CID.
+	 * We use the original ExpStatSN sent in the first login request
+	 * to acknowledge commands for the failed connection.
+	 *
+	 * Also note that an explict logout may have already been sent,
+	 * but the response may not be sent due to additional connection
+	 * loss.
+	 */
+	if (sess->sess_ops->ErrorRecoveryLevel == 2) {
+		cr = iscsit_get_inactive_connection_recovery_entry(
+				sess, cid);
+		if ((cr)) {
+			pr_debug("Performing implicit logout"
+				" for connection recovery on CID: %hu\n",
+					conn->cid);
+			iscsit_discard_cr_cmds_by_expstatsn(cr, exp_statsn);
+		}
+	}
+
+	/*
+	 * Else we follow item 4 from the login table in that we have
+	 * found an existing ISID and a valid/existing TSIH and a new
+	 * CID we go ahead and continue to add a new connection to the
+	 * session.
+	 */
+	pr_debug("Adding CID %hu to existing session for %s.\n",
+			cid, sess->sess_ops->InitiatorName);
+
+	if ((atomic_read(&sess->nconn) + 1) > sess->sess_ops->MaxConnections) {
+		pr_err("Adding additional connection to this session"
+			" would exceed MaxConnections %d, login failed.\n",
+				sess->sess_ops->MaxConnections);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_ISID_ERROR);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+
+	if (!sess->sess_ops->SessionType)
+		iscsit_start_nopin_timer(conn);
+}
+
+static int iscsi_post_login_handler(
+	struct iscsi_np *np,
+	struct iscsi_conn *conn,
+	u8 zero_tsih)
+{
+	int stop_timer = 0;
+	struct iscsi_session *sess = conn->sess;
+	struct se_session *se_sess = sess->se_sess;
+	struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+	struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+	struct iscsi_thread_set *ts;
+
+	iscsit_inc_conn_usage_count(conn);
+
+	iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_SUCCESS,
+			ISCSI_LOGIN_STATUS_ACCEPT);
+
+	pr_debug("Moving to TARG_CONN_STATE_LOGGED_IN.\n");
+	conn->conn_state = TARG_CONN_STATE_LOGGED_IN;
+
+	iscsi_set_connection_parameters(conn->conn_ops, conn->param_list);
+	iscsit_set_sync_and_steering_values(conn);
+	/*
+	 * SCSI Initiator -> SCSI Target Port Mapping
+	 */
+	ts = iscsi_get_thread_set();
+	if (!zero_tsih) {
+		iscsi_set_session_parameters(sess->sess_ops,
+				conn->param_list, 0);
+		iscsi_release_param_list(conn->param_list);
+		conn->param_list = NULL;
+
+		spin_lock_bh(&sess->conn_lock);
+		atomic_set(&sess->session_continuation, 0);
+		if (sess->session_state == TARG_SESS_STATE_FAILED) {
+			pr_debug("Moving to"
+					" TARG_SESS_STATE_LOGGED_IN.\n");
+			sess->session_state = TARG_SESS_STATE_LOGGED_IN;
+			stop_timer = 1;
+		}
+
+		pr_debug("iSCSI Login successful on CID: %hu from %s to"
+			" %s:%hu,%hu\n", conn->cid, conn->login_ip, np->np_ip,
+				np->np_port, tpg->tpgt);
+
+		list_add_tail(&conn->conn_list, &sess->sess_conn_list);
+		atomic_inc(&sess->nconn);
+		pr_debug("Incremented iSCSI Connection count to %hu"
+			" from node: %s\n", atomic_read(&sess->nconn),
+			sess->sess_ops->InitiatorName);
+		spin_unlock_bh(&sess->conn_lock);
+
+		iscsi_post_login_start_timers(conn);
+		iscsi_activate_thread_set(conn, ts);
+		/*
+		 * Determine CPU mask to ensure connection's RX and TX kthreads
+		 * are scheduled on the same CPU.
+		 */
+		iscsit_thread_get_cpumask(conn);
+		conn->conn_rx_reset_cpumask = 1;
+		conn->conn_tx_reset_cpumask = 1;
+
+		iscsit_dec_conn_usage_count(conn);
+		if (stop_timer) {
+			spin_lock_bh(&se_tpg->session_lock);
+			iscsit_stop_time2retain_timer(sess);
+			spin_unlock_bh(&se_tpg->session_lock);
+		}
+		iscsit_dec_session_usage_count(sess);
+		return 0;
+	}
+
+	iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1);
+	iscsi_release_param_list(conn->param_list);
+	conn->param_list = NULL;
+
+	iscsit_determine_maxcmdsn(sess);
+
+	spin_lock_bh(&se_tpg->session_lock);
+	__transport_register_session(&sess->tpg->tpg_se_tpg,
+			se_sess->se_node_acl, se_sess, (void *)sess);
+	pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n");
+	sess->session_state = TARG_SESS_STATE_LOGGED_IN;
+
+	pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n",
+		conn->cid, conn->login_ip, np->np_ip, np->np_port, tpg->tpgt);
+
+	spin_lock_bh(&sess->conn_lock);
+	list_add_tail(&conn->conn_list, &sess->sess_conn_list);
+	atomic_inc(&sess->nconn);
+	pr_debug("Incremented iSCSI Connection count to %hu from node:"
+		" %s\n", atomic_read(&sess->nconn),
+		sess->sess_ops->InitiatorName);
+	spin_unlock_bh(&sess->conn_lock);
+
+	sess->sid = tpg->sid++;
+	if (!sess->sid)
+		sess->sid = tpg->sid++;
+	pr_debug("Established iSCSI session from node: %s\n",
+			sess->sess_ops->InitiatorName);
+
+	tpg->nsessions++;
+	if (tpg->tpg_tiqn)
+		tpg->tpg_tiqn->tiqn_nsessions++;
+
+	pr_debug("Incremented number of active iSCSI sessions to %u on"
+		" iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt);
+	spin_unlock_bh(&se_tpg->session_lock);
+
+	iscsi_post_login_start_timers(conn);
+	iscsi_activate_thread_set(conn, ts);
+	/*
+	 * Determine CPU mask to ensure connection's RX and TX kthreads
+	 * are scheduled on the same CPU.
+	 */
+	iscsit_thread_get_cpumask(conn);
+	conn->conn_rx_reset_cpumask = 1;
+	conn->conn_tx_reset_cpumask = 1;
+
+	iscsit_dec_conn_usage_count(conn);
+
+	return 0;
+}
+
+static void iscsi_handle_login_thread_timeout(unsigned long data)
+{
+	struct iscsi_np *np = (struct iscsi_np *) data;
+
+	spin_lock_bh(&np->np_thread_lock);
+	pr_err("iSCSI Login timeout on Network Portal %s:%hu\n",
+			np->np_ip, np->np_port);
+
+	if (np->np_login_timer_flags & ISCSI_TF_STOP) {
+		spin_unlock_bh(&np->np_thread_lock);
+		return;
+	}
+
+	if (np->np_thread)
+		send_sig(SIGINT, np->np_thread, 1);
+
+	np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
+	spin_unlock_bh(&np->np_thread_lock);
+}
+
+static void iscsi_start_login_thread_timer(struct iscsi_np *np)
+{
+	/*
+	 * This used the TA_LOGIN_TIMEOUT constant because at this
+	 * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout
+	 */
+	spin_lock_bh(&np->np_thread_lock);
+	init_timer(&np->np_login_timer);
+	np->np_login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ);
+	np->np_login_timer.data = (unsigned long)np;
+	np->np_login_timer.function = iscsi_handle_login_thread_timeout;
+	np->np_login_timer_flags &= ~ISCSI_TF_STOP;
+	np->np_login_timer_flags |= ISCSI_TF_RUNNING;
+	add_timer(&np->np_login_timer);
+
+	pr_debug("Added timeout timer to iSCSI login request for"
+			" %u seconds.\n", TA_LOGIN_TIMEOUT);
+	spin_unlock_bh(&np->np_thread_lock);
+}
+
+static void iscsi_stop_login_thread_timer(struct iscsi_np *np)
+{
+	spin_lock_bh(&np->np_thread_lock);
+	if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) {
+		spin_unlock_bh(&np->np_thread_lock);
+		return;
+	}
+	np->np_login_timer_flags |= ISCSI_TF_STOP;
+	spin_unlock_bh(&np->np_thread_lock);
+
+	del_timer_sync(&np->np_login_timer);
+
+	spin_lock_bh(&np->np_thread_lock);
+	np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
+	spin_unlock_bh(&np->np_thread_lock);
+}
+
+int iscsi_target_setup_login_socket(
+	struct iscsi_np *np,
+	struct __kernel_sockaddr_storage *sockaddr)
+{
+	struct socket *sock;
+	int backlog = 5, ret, opt = 0, len;
+
+	switch (np->np_network_transport) {
+	case ISCSI_TCP:
+		np->np_ip_proto = IPPROTO_TCP;
+		np->np_sock_type = SOCK_STREAM;
+		break;
+	case ISCSI_SCTP_TCP:
+		np->np_ip_proto = IPPROTO_SCTP;
+		np->np_sock_type = SOCK_STREAM;
+		break;
+	case ISCSI_SCTP_UDP:
+		np->np_ip_proto = IPPROTO_SCTP;
+		np->np_sock_type = SOCK_SEQPACKET;
+		break;
+	case ISCSI_IWARP_TCP:
+	case ISCSI_IWARP_SCTP:
+	case ISCSI_INFINIBAND:
+	default:
+		pr_err("Unsupported network_transport: %d\n",
+				np->np_network_transport);
+		return -EINVAL;
+	}
+
+	ret = sock_create(sockaddr->ss_family, np->np_sock_type,
+			np->np_ip_proto, &sock);
+	if (ret < 0) {
+		pr_err("sock_create() failed.\n");
+		return ret;
+	}
+	np->np_socket = sock;
+	/*
+	 * The SCTP stack needs struct socket->file.
+	 */
+	if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+	    (np->np_network_transport == ISCSI_SCTP_UDP)) {
+		if (!sock->file) {
+			sock->file = kzalloc(sizeof(struct file), GFP_KERNEL);
+			if (!sock->file) {
+				pr_err("Unable to allocate struct"
+						" file for SCTP\n");
+				ret = -ENOMEM;
+				goto fail;
+			}
+			np->np_flags |= NPF_SCTP_STRUCT_FILE;
+		}
+	}
+	/*
+	 * Setup the np->np_sockaddr from the passed sockaddr setup
+	 * in iscsi_target_configfs.c code..
+	 */
+	memcpy((void *)&np->np_sockaddr, (void *)sockaddr,
+			sizeof(struct __kernel_sockaddr_storage));
+
+	if (sockaddr->ss_family == AF_INET6)
+		len = sizeof(struct sockaddr_in6);
+	else
+		len = sizeof(struct sockaddr_in);
+	/*
+	 * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
+	 */
+	opt = 1;
+	if (np->np_network_transport == ISCSI_TCP) {
+		ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
+				(char *)&opt, sizeof(opt));
+		if (ret < 0) {
+			pr_err("kernel_setsockopt() for TCP_NODELAY"
+				" failed: %d\n", ret);
+			goto fail;
+		}
+	}
+
+	ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+			(char *)&opt, sizeof(opt));
+	if (ret < 0) {
+		pr_err("kernel_setsockopt() for SO_REUSEADDR"
+			" failed\n");
+		goto fail;
+	}
+
+	ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len);
+	if (ret < 0) {
+		pr_err("kernel_bind() failed: %d\n", ret);
+		goto fail;
+	}
+
+	ret = kernel_listen(sock, backlog);
+	if (ret != 0) {
+		pr_err("kernel_listen() failed: %d\n", ret);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	np->np_socket = NULL;
+	if (sock) {
+		if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+			kfree(sock->file);
+			sock->file = NULL;
+		}
+
+		sock_release(sock);
+	}
+	return ret;
+}
+
+static int __iscsi_target_login_thread(struct iscsi_np *np)
+{
+	u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0;
+	int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop;
+	struct iscsi_conn *conn = NULL;
+	struct iscsi_login *login;
+	struct iscsi_portal_group *tpg = NULL;
+	struct socket *new_sock, *sock;
+	struct kvec iov;
+	struct iscsi_login_req *pdu;
+	struct sockaddr_in sock_in;
+	struct sockaddr_in6 sock_in6;
+
+	flush_signals(current);
+	set_sctp_conn_flag = 0;
+	sock = np->np_socket;
+	ip_proto = np->np_ip_proto;
+	sock_type = np->np_sock_type;
+
+	spin_lock_bh(&np->np_thread_lock);
+	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+		np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+		complete(&np->np_restart_comp);
+	} else {
+		np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+	}
+	spin_unlock_bh(&np->np_thread_lock);
+
+	if (kernel_accept(sock, &new_sock, 0) < 0) {
+		spin_lock_bh(&np->np_thread_lock);
+		if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+			spin_unlock_bh(&np->np_thread_lock);
+			complete(&np->np_restart_comp);
+			/* Get another socket */
+			return 1;
+		}
+		spin_unlock_bh(&np->np_thread_lock);
+		goto out;
+	}
+	/*
+	 * The SCTP stack needs struct socket->file.
+	 */
+	if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+	    (np->np_network_transport == ISCSI_SCTP_UDP)) {
+		if (!new_sock->file) {
+			new_sock->file = kzalloc(
+					sizeof(struct file), GFP_KERNEL);
+			if (!new_sock->file) {
+				pr_err("Unable to allocate struct"
+						" file for SCTP\n");
+				sock_release(new_sock);
+				/* Get another socket */
+				return 1;
+			}
+			set_sctp_conn_flag = 1;
+		}
+	}
+
+	iscsi_start_login_thread_timer(np);
+
+	conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL);
+	if (!conn) {
+		pr_err("Could not allocate memory for"
+			" new connection\n");
+		if (set_sctp_conn_flag) {
+			kfree(new_sock->file);
+			new_sock->file = NULL;
+		}
+		sock_release(new_sock);
+		/* Get another socket */
+		return 1;
+	}
+
+	pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+	conn->conn_state = TARG_CONN_STATE_FREE;
+	conn->sock = new_sock;
+
+	if (set_sctp_conn_flag)
+		conn->conn_flags |= CONNFLAG_SCTP_STRUCT_FILE;
+
+	pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n");
+	conn->conn_state = TARG_CONN_STATE_XPT_UP;
+
+	/*
+	 * Allocate conn->conn_ops early as a failure calling
+	 * iscsit_tx_login_rsp() below will call tx_data().
+	 */
+	conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL);
+	if (!conn->conn_ops) {
+		pr_err("Unable to allocate memory for"
+			" struct iscsi_conn_ops.\n");
+		goto new_sess_out;
+	}
+	/*
+	 * Perform the remaining iSCSI connection initialization items..
+	 */
+	if (iscsi_login_init_conn(conn) < 0)
+		goto new_sess_out;
+
+	memset(buffer, 0, ISCSI_HDR_LEN);
+	memset(&iov, 0, sizeof(struct kvec));
+	iov.iov_base	= buffer;
+	iov.iov_len	= ISCSI_HDR_LEN;
+
+	if (rx_data(conn, &iov, 1, ISCSI_HDR_LEN) <= 0) {
+		pr_err("rx_data() returned an error.\n");
+		goto new_sess_out;
+	}
+
+	iscsi_opcode = (buffer[0] & ISCSI_OPCODE_MASK);
+	if (!(iscsi_opcode & ISCSI_OP_LOGIN)) {
+		pr_err("First opcode is not login request,"
+			" failing login request.\n");
+		goto new_sess_out;
+	}
+
+	pdu			= (struct iscsi_login_req *) buffer;
+	pdu->cid		= be16_to_cpu(pdu->cid);
+	pdu->tsih		= be16_to_cpu(pdu->tsih);
+	pdu->itt		= be32_to_cpu(pdu->itt);
+	pdu->cmdsn		= be32_to_cpu(pdu->cmdsn);
+	pdu->exp_statsn		= be32_to_cpu(pdu->exp_statsn);
+	/*
+	 * Used by iscsit_tx_login_rsp() for Login Resonses PDUs
+	 * when Status-Class != 0.
+	*/
+	conn->login_itt		= pdu->itt;
+
+	spin_lock_bh(&np->np_thread_lock);
+	if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+		spin_unlock_bh(&np->np_thread_lock);
+		pr_err("iSCSI Network Portal on %s:%hu currently not"
+			" active.\n", np->np_ip, np->np_port);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+		goto new_sess_out;
+	}
+	spin_unlock_bh(&np->np_thread_lock);
+
+	if (np->np_sockaddr.ss_family == AF_INET6) {
+		memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
+
+		if (conn->sock->ops->getname(conn->sock,
+				(struct sockaddr *)&sock_in6, &err, 1) < 0) {
+			pr_err("sock_ops->getname() failed.\n");
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+					ISCSI_LOGIN_STATUS_TARGET_ERROR);
+			goto new_sess_out;
+		}
+#if 0
+		if (!iscsi_ntop6((const unsigned char *)
+				&sock_in6.sin6_addr.in6_u,
+				(char *)&conn->ipv6_login_ip[0],
+				IPV6_ADDRESS_SPACE)) {
+			pr_err("iscsi_ntop6() failed\n");
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+					ISCSI_LOGIN_STATUS_TARGET_ERROR);
+			goto new_sess_out;
+		}
+#else
+		pr_debug("Skipping iscsi_ntop6()\n");
+#endif
+	} else {
+		memset(&sock_in, 0, sizeof(struct sockaddr_in));
+
+		if (conn->sock->ops->getname(conn->sock,
+				(struct sockaddr *)&sock_in, &err, 1) < 0) {
+			pr_err("sock_ops->getname() failed.\n");
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+					ISCSI_LOGIN_STATUS_TARGET_ERROR);
+			goto new_sess_out;
+		}
+		sprintf(conn->login_ip, "%pI4", &sock_in.sin_addr.s_addr);
+		conn->login_port = ntohs(sock_in.sin_port);
+	}
+
+	conn->network_transport = np->np_network_transport;
+
+	pr_debug("Received iSCSI login request from %s on %s Network"
+			" Portal %s:%hu\n", conn->login_ip,
+		(conn->network_transport == ISCSI_TCP) ? "TCP" : "SCTP",
+			np->np_ip, np->np_port);
+
+	pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n");
+	conn->conn_state	= TARG_CONN_STATE_IN_LOGIN;
+
+	if (iscsi_login_check_initiator_version(conn, pdu->max_version,
+			pdu->min_version) < 0)
+		goto new_sess_out;
+
+	zero_tsih = (pdu->tsih == 0x0000);
+	if ((zero_tsih)) {
+		/*
+		 * This is the leading connection of a new session.
+		 * We wait until after authentication to check for
+		 * session reinstatement.
+		 */
+		if (iscsi_login_zero_tsih_s1(conn, buffer) < 0)
+			goto new_sess_out;
+	} else {
+		/*
+		 * Add a new connection to an existing session.
+		 * We check for a non-existant session in
+		 * iscsi_login_non_zero_tsih_s2() below based
+		 * on ISID/TSIH, but wait until after authentication
+		 * to check for connection reinstatement, etc.
+		 */
+		if (iscsi_login_non_zero_tsih_s1(conn, buffer) < 0)
+			goto new_sess_out;
+	}
+
+	/*
+	 * This will process the first login request, and call
+	 * iscsi_target_locate_portal(), and return a valid struct iscsi_login.
+	 */
+	login = iscsi_target_init_negotiation(np, conn, buffer);
+	if (!login) {
+		tpg = conn->tpg;
+		goto new_sess_out;
+	}
+
+	tpg = conn->tpg;
+	if (!tpg) {
+		pr_err("Unable to locate struct iscsi_conn->tpg\n");
+		goto new_sess_out;
+	}
+
+	if (zero_tsih) {
+		if (iscsi_login_zero_tsih_s2(conn) < 0) {
+			iscsi_target_nego_release(login, conn);
+			goto new_sess_out;
+		}
+	} else {
+		if (iscsi_login_non_zero_tsih_s2(conn, buffer) < 0) {
+			iscsi_target_nego_release(login, conn);
+			goto old_sess_out;
+		}
+	}
+
+	if (iscsi_target_start_negotiation(login, conn) < 0)
+		goto new_sess_out;
+
+	if (!conn->sess) {
+		pr_err("struct iscsi_conn session pointer is NULL!\n");
+		goto new_sess_out;
+	}
+
+	iscsi_stop_login_thread_timer(np);
+
+	if (signal_pending(current))
+		goto new_sess_out;
+
+	ret = iscsi_post_login_handler(np, conn, zero_tsih);
+
+	if (ret < 0)
+		goto new_sess_out;
+
+	iscsit_deaccess_np(np, tpg);
+	tpg = NULL;
+	/* Get another socket */
+	return 1;
+
+new_sess_out:
+	pr_err("iSCSI Login negotiation failed.\n");
+	iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				  ISCSI_LOGIN_STATUS_INIT_ERR);
+	if (!zero_tsih || !conn->sess)
+		goto old_sess_out;
+	if (conn->sess->se_sess)
+		transport_free_session(conn->sess->se_sess);
+	if (conn->sess->session_index != 0) {
+		spin_lock_bh(&sess_idr_lock);
+		idr_remove(&sess_idr, conn->sess->session_index);
+		spin_unlock_bh(&sess_idr_lock);
+	}
+	if (conn->sess->sess_ops)
+		kfree(conn->sess->sess_ops);
+	if (conn->sess)
+		kfree(conn->sess);
+old_sess_out:
+	iscsi_stop_login_thread_timer(np);
+	/*
+	 * If login negotiation fails check if the Time2Retain timer
+	 * needs to be restarted.
+	 */
+	if (!zero_tsih && conn->sess) {
+		spin_lock_bh(&conn->sess->conn_lock);
+		if (conn->sess->session_state == TARG_SESS_STATE_FAILED) {
+			struct se_portal_group *se_tpg =
+					&ISCSI_TPG_C(conn)->tpg_se_tpg;
+
+			atomic_set(&conn->sess->session_continuation, 0);
+			spin_unlock_bh(&conn->sess->conn_lock);
+			spin_lock_bh(&se_tpg->session_lock);
+			iscsit_start_time2retain_handler(conn->sess);
+			spin_unlock_bh(&se_tpg->session_lock);
+		} else
+			spin_unlock_bh(&conn->sess->conn_lock);
+		iscsit_dec_session_usage_count(conn->sess);
+	}
+
+	if (!IS_ERR(conn->conn_rx_hash.tfm))
+		crypto_free_hash(conn->conn_rx_hash.tfm);
+	if (!IS_ERR(conn->conn_tx_hash.tfm))
+		crypto_free_hash(conn->conn_tx_hash.tfm);
+
+	if (conn->conn_cpumask)
+		free_cpumask_var(conn->conn_cpumask);
+
+	kfree(conn->conn_ops);
+
+	if (conn->param_list) {
+		iscsi_release_param_list(conn->param_list);
+		conn->param_list = NULL;
+	}
+	if (conn->sock) {
+		if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+			kfree(conn->sock->file);
+			conn->sock->file = NULL;
+		}
+		sock_release(conn->sock);
+	}
+	kfree(conn);
+
+	if (tpg) {
+		iscsit_deaccess_np(np, tpg);
+		tpg = NULL;
+	}
+
+out:
+	stop = kthread_should_stop();
+	if (!stop && signal_pending(current)) {
+		spin_lock_bh(&np->np_thread_lock);
+		stop = (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN);
+		spin_unlock_bh(&np->np_thread_lock);
+	}
+	/* Wait for another socket.. */
+	if (!stop)
+		return 1;
+
+	iscsi_stop_login_thread_timer(np);
+	spin_lock_bh(&np->np_thread_lock);
+	np->np_thread_state = ISCSI_NP_THREAD_EXIT;
+	spin_unlock_bh(&np->np_thread_lock);
+	return 0;
+}
+
+int iscsi_target_login_thread(void *arg)
+{
+	struct iscsi_np *np = (struct iscsi_np *)arg;
+	int ret;
+
+	allow_signal(SIGINT);
+
+	while (!kthread_should_stop()) {
+		ret = __iscsi_target_login_thread(np);
+		/*
+		 * We break and exit here unless another sock_accept() call
+		 * is expected.
+		 */
+		if (ret != 1)
+			break;
+	}
+
+	return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h
new file mode 100644
index 0000000..091dcae
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_login.h
@@ -0,0 +1,12 @@
+#ifndef ISCSI_TARGET_LOGIN_H
+#define ISCSI_TARGET_LOGIN_H
+
+extern int iscsi_login_setup_crypto(struct iscsi_conn *);
+extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *);
+extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32);
+extern int iscsi_target_setup_login_socket(struct iscsi_np *,
+				struct __kernel_sockaddr_storage *);
+extern int iscsi_target_login_thread(void *);
+extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *);
+
+#endif   /*** ISCSI_TARGET_LOGIN_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
new file mode 100644
index 0000000..713a4d2
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -0,0 +1,1067 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI Parameter negotiation.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/ctype.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tpg.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_auth.h"
+
+#define MAX_LOGIN_PDUS  7
+#define TEXT_LEN	4096
+
+void convert_null_to_semi(char *buf, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (buf[i] == '\0')
+			buf[i] = ';';
+}
+
+int strlen_semi(char *buf)
+{
+	int i = 0;
+
+	while (buf[i] != '\0') {
+		if (buf[i] == ';')
+			return i;
+		i++;
+	}
+
+	return -1;
+}
+
+int extract_param(
+	const char *in_buf,
+	const char *pattern,
+	unsigned int max_length,
+	char *out_buf,
+	unsigned char *type)
+{
+	char *ptr;
+	int len;
+
+	if (!in_buf || !pattern || !out_buf || !type)
+		return -1;
+
+	ptr = strstr(in_buf, pattern);
+	if (!ptr)
+		return -1;
+
+	ptr = strstr(ptr, "=");
+	if (!ptr)
+		return -1;
+
+	ptr += 1;
+	if (*ptr == '0' && (*(ptr+1) == 'x' || *(ptr+1) == 'X')) {
+		ptr += 2; /* skip 0x */
+		*type = HEX;
+	} else
+		*type = DECIMAL;
+
+	len = strlen_semi(ptr);
+	if (len < 0)
+		return -1;
+
+	if (len > max_length) {
+		pr_err("Length of input: %d exeeds max_length:"
+			" %d\n", len, max_length);
+		return -1;
+	}
+	memcpy(out_buf, ptr, len);
+	out_buf[len] = '\0';
+
+	return 0;
+}
+
+static u32 iscsi_handle_authentication(
+	struct iscsi_conn *conn,
+	char *in_buf,
+	char *out_buf,
+	int in_length,
+	int *out_length,
+	unsigned char *authtype)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_auth *auth;
+	struct iscsi_node_acl *iscsi_nacl;
+	struct se_node_acl *se_nacl;
+
+	if (!sess->sess_ops->SessionType) {
+		/*
+		 * For SessionType=Normal
+		 */
+		se_nacl = conn->sess->se_sess->se_node_acl;
+		if (!se_nacl) {
+			pr_err("Unable to locate struct se_node_acl for"
+					" CHAP auth\n");
+			return -1;
+		}
+		iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl,
+				se_node_acl);
+		if (!iscsi_nacl) {
+			pr_err("Unable to locate struct iscsi_node_acl for"
+					" CHAP auth\n");
+			return -1;
+		}
+
+		auth = ISCSI_NODE_AUTH(iscsi_nacl);
+	} else {
+		/*
+		 * For SessionType=Discovery
+		 */
+		auth = &iscsit_global->discovery_acl.node_auth;
+	}
+
+	if (strstr("CHAP", authtype))
+		strcpy(conn->sess->auth_type, "CHAP");
+	else
+		strcpy(conn->sess->auth_type, NONE);
+
+	if (strstr("None", authtype))
+		return 1;
+#ifdef CANSRP
+	else if (strstr("SRP", authtype))
+		return srp_main_loop(conn, auth, in_buf, out_buf,
+				&in_length, out_length);
+#endif
+	else if (strstr("CHAP", authtype))
+		return chap_main_loop(conn, auth, in_buf, out_buf,
+				&in_length, out_length);
+	else if (strstr("SPKM1", authtype))
+		return 2;
+	else if (strstr("SPKM2", authtype))
+		return 2;
+	else if (strstr("KRB5", authtype))
+		return 2;
+	else
+		return 2;
+}
+
+static void iscsi_remove_failed_auth_entry(struct iscsi_conn *conn)
+{
+	kfree(conn->auth_protocol);
+}
+
+static int iscsi_target_check_login_request(
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	int req_csg, req_nsg, rsp_csg, rsp_nsg;
+	u32 payload_length;
+	struct iscsi_login_req *login_req;
+	struct iscsi_login_rsp *login_rsp;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	login_rsp = (struct iscsi_login_rsp *) login->rsp;
+	payload_length = ntoh24(login_req->dlength);
+
+	switch (login_req->opcode & ISCSI_OPCODE_MASK) {
+	case ISCSI_OP_LOGIN:
+		break;
+	default:
+		pr_err("Received unknown opcode 0x%02x.\n",
+				login_req->opcode & ISCSI_OPCODE_MASK);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	if ((login_req->flags & ISCSI_FLAG_LOGIN_CONTINUE) &&
+	    (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+		pr_err("Login request has both ISCSI_FLAG_LOGIN_CONTINUE"
+			" and ISCSI_FLAG_LOGIN_TRANSIT set, protocol error.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	req_csg = (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+	rsp_csg = (login_rsp->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+	req_nsg = (login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
+	rsp_nsg = (login_rsp->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
+
+	if (req_csg != login->current_stage) {
+		pr_err("Initiator unexpectedly changed login stage"
+			" from %d to %d, login failed.\n", login->current_stage,
+			req_csg);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	if ((req_nsg == 2) || (req_csg >= 2) ||
+	   ((login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT) &&
+	    (req_nsg <= req_csg))) {
+		pr_err("Illegal login_req->flags Combination, CSG: %d,"
+			" NSG: %d, ISCSI_FLAG_LOGIN_TRANSIT: %d.\n", req_csg,
+			req_nsg, (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT));
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	if ((login_req->max_version != login->version_max) ||
+	    (login_req->min_version != login->version_min)) {
+		pr_err("Login request changed Version Max/Nin"
+			" unexpectedly to 0x%02x/0x%02x, protocol error\n",
+			login_req->max_version, login_req->min_version);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	if (memcmp(login_req->isid, login->isid, 6) != 0) {
+		pr_err("Login request changed ISID unexpectedly,"
+				" protocol error.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	if (login_req->itt != login->init_task_tag) {
+		pr_err("Login request changed ITT unexpectedly to"
+			" 0x%08x, protocol error.\n", login_req->itt);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_INIT_ERR);
+		return -1;
+	}
+
+	if (payload_length > MAX_KEY_VALUE_PAIRS) {
+		pr_err("Login request payload exceeds default"
+			" MaxRecvDataSegmentLength: %u, protocol error.\n",
+				MAX_KEY_VALUE_PAIRS);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int iscsi_target_check_first_request(
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	struct iscsi_param *param = NULL;
+	struct se_node_acl *se_nacl;
+
+	login->first_request = 0;
+
+	list_for_each_entry(param, &conn->param_list->param_list, p_list) {
+		if (!strncmp(param->name, SESSIONTYPE, 11)) {
+			if (!IS_PSTATE_ACCEPTOR(param)) {
+				pr_err("SessionType key not received"
+					" in first login request.\n");
+				iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+					ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+				return -1;
+			}
+			if (!strncmp(param->value, DISCOVERY, 9))
+				return 0;
+		}
+
+		if (!strncmp(param->name, INITIATORNAME, 13)) {
+			if (!IS_PSTATE_ACCEPTOR(param)) {
+				if (!login->leading_connection)
+					continue;
+
+				pr_err("InitiatorName key not received"
+					" in first login request.\n");
+				iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+					ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+				return -1;
+			}
+
+			/*
+			 * For non-leading connections, double check that the
+			 * received InitiatorName matches the existing session's
+			 * struct iscsi_node_acl.
+			 */
+			if (!login->leading_connection) {
+				se_nacl = conn->sess->se_sess->se_node_acl;
+				if (!se_nacl) {
+					pr_err("Unable to locate"
+						" struct se_node_acl\n");
+					iscsit_tx_login_rsp(conn,
+							ISCSI_STATUS_CLS_INITIATOR_ERR,
+							ISCSI_LOGIN_STATUS_TGT_NOT_FOUND);
+					return -1;
+				}
+
+				if (strcmp(param->value,
+						se_nacl->initiatorname)) {
+					pr_err("Incorrect"
+						" InitiatorName: %s for this"
+						" iSCSI Initiator Node.\n",
+						param->value);
+					iscsit_tx_login_rsp(conn,
+							ISCSI_STATUS_CLS_INITIATOR_ERR,
+							ISCSI_LOGIN_STATUS_TGT_NOT_FOUND);
+					return -1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+	u32 padding = 0;
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_login_rsp *login_rsp;
+
+	login_rsp = (struct iscsi_login_rsp *) login->rsp;
+
+	login_rsp->opcode		= ISCSI_OP_LOGIN_RSP;
+	hton24(login_rsp->dlength, login->rsp_length);
+	memcpy(login_rsp->isid, login->isid, 6);
+	login_rsp->tsih			= cpu_to_be16(login->tsih);
+	login_rsp->itt			= cpu_to_be32(login->init_task_tag);
+	login_rsp->statsn		= cpu_to_be32(conn->stat_sn++);
+	login_rsp->exp_cmdsn		= cpu_to_be32(conn->sess->exp_cmd_sn);
+	login_rsp->max_cmdsn		= cpu_to_be32(conn->sess->max_cmd_sn);
+
+	pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x,"
+		" ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:"
+		" %u\n", login_rsp->flags, ntohl(login_rsp->itt),
+		ntohl(login_rsp->exp_cmdsn), ntohl(login_rsp->max_cmdsn),
+		ntohl(login_rsp->statsn), login->rsp_length);
+
+	padding = ((-login->rsp_length) & 3);
+
+	if (iscsi_login_tx_data(
+			conn,
+			login->rsp,
+			login->rsp_buf,
+			login->rsp_length + padding) < 0)
+		return -1;
+
+	login->rsp_length		= 0;
+	login_rsp->tsih			= be16_to_cpu(login_rsp->tsih);
+	login_rsp->itt			= be32_to_cpu(login_rsp->itt);
+	login_rsp->statsn		= be32_to_cpu(login_rsp->statsn);
+	mutex_lock(&sess->cmdsn_mutex);
+	login_rsp->exp_cmdsn		= be32_to_cpu(sess->exp_cmd_sn);
+	login_rsp->max_cmdsn		= be32_to_cpu(sess->max_cmd_sn);
+	mutex_unlock(&sess->cmdsn_mutex);
+
+	return 0;
+}
+
+static int iscsi_target_do_rx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+	u32 padding = 0, payload_length;
+	struct iscsi_login_req *login_req;
+
+	if (iscsi_login_rx_data(conn, login->req, ISCSI_HDR_LEN) < 0)
+		return -1;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	payload_length			= ntoh24(login_req->dlength);
+	login_req->tsih			= be16_to_cpu(login_req->tsih);
+	login_req->itt			= be32_to_cpu(login_req->itt);
+	login_req->cid			= be16_to_cpu(login_req->cid);
+	login_req->cmdsn		= be32_to_cpu(login_req->cmdsn);
+	login_req->exp_statsn		= be32_to_cpu(login_req->exp_statsn);
+
+	pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x,"
+		" CmdSN: 0x%08x, ExpStatSN: 0x%08x, CID: %hu, Length: %u\n",
+		 login_req->flags, login_req->itt, login_req->cmdsn,
+		 login_req->exp_statsn, login_req->cid, payload_length);
+
+	if (iscsi_target_check_login_request(conn, login) < 0)
+		return -1;
+
+	padding = ((-payload_length) & 3);
+	memset(login->req_buf, 0, MAX_KEY_VALUE_PAIRS);
+
+	if (iscsi_login_rx_data(
+			conn,
+			login->req_buf,
+			payload_length + padding) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+	if (iscsi_target_do_tx_login_io(conn, login) < 0)
+		return -1;
+
+	if (iscsi_target_do_rx_login_io(conn, login) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int iscsi_target_get_initial_payload(
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	u32 padding = 0, payload_length;
+	struct iscsi_login_req *login_req;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	payload_length = ntoh24(login_req->dlength);
+
+	pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x,"
+		" CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n",
+		login_req->flags, login_req->itt, login_req->cmdsn,
+		login_req->exp_statsn, payload_length);
+
+	if (iscsi_target_check_login_request(conn, login) < 0)
+		return -1;
+
+	padding = ((-payload_length) & 3);
+
+	if (iscsi_login_rx_data(
+			conn,
+			login->req_buf,
+			payload_length + padding) < 0)
+		return -1;
+
+	return 0;
+}
+
+/*
+ *	NOTE: We check for existing sessions or connections AFTER the initiator
+ *	has been successfully authenticated in order to protect against faked
+ *	ISID/TSIH combinations.
+ */
+static int iscsi_target_check_for_existing_instances(
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	if (login->checked_for_existing)
+		return 0;
+
+	login->checked_for_existing = 1;
+
+	if (!login->tsih)
+		return iscsi_check_for_session_reinstatement(conn);
+	else
+		return iscsi_login_post_auth_non_zero_tsih(conn, login->cid,
+				login->initial_exp_statsn);
+}
+
+static int iscsi_target_do_authentication(
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	int authret;
+	u32 payload_length;
+	struct iscsi_param *param;
+	struct iscsi_login_req *login_req;
+	struct iscsi_login_rsp *login_rsp;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	login_rsp = (struct iscsi_login_rsp *) login->rsp;
+	payload_length = ntoh24(login_req->dlength);
+
+	param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list);
+	if (!param)
+		return -1;
+
+	authret = iscsi_handle_authentication(
+			conn,
+			login->req_buf,
+			login->rsp_buf,
+			payload_length,
+			&login->rsp_length,
+			param->value);
+	switch (authret) {
+	case 0:
+		pr_debug("Received OK response"
+		" from LIO Authentication, continuing.\n");
+		break;
+	case 1:
+		pr_debug("iSCSI security negotiation"
+			" completed sucessfully.\n");
+		login->auth_complete = 1;
+		if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) &&
+		    (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+			login_rsp->flags |= (ISCSI_FLAG_LOGIN_NEXT_STAGE1 |
+					     ISCSI_FLAG_LOGIN_TRANSIT);
+			login->current_stage = 1;
+		}
+		return iscsi_target_check_for_existing_instances(
+				conn, login);
+	case 2:
+		pr_err("Security negotiation"
+			" failed.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_AUTH_FAILED);
+		return -1;
+	default:
+		pr_err("Received unknown error %d from LIO"
+				" Authentication\n", authret);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_TARGET_ERROR);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int iscsi_target_handle_csg_zero(
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	int ret;
+	u32 payload_length;
+	struct iscsi_param *param;
+	struct iscsi_login_req *login_req;
+	struct iscsi_login_rsp *login_rsp;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	login_rsp = (struct iscsi_login_rsp *) login->rsp;
+	payload_length = ntoh24(login_req->dlength);
+
+	param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list);
+	if (!param)
+		return -1;
+
+	ret = iscsi_decode_text_input(
+			PHASE_SECURITY|PHASE_DECLARATIVE,
+			SENDER_INITIATOR|SENDER_RECEIVER,
+			login->req_buf,
+			payload_length,
+			conn->param_list);
+	if (ret < 0)
+		return -1;
+
+	if (ret > 0) {
+		if (login->auth_complete) {
+			pr_err("Initiator has already been"
+				" successfully authenticated, but is still"
+				" sending %s keys.\n", param->value);
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+					ISCSI_LOGIN_STATUS_INIT_ERR);
+			return -1;
+		}
+
+		goto do_auth;
+	}
+
+	if (login->first_request)
+		if (iscsi_target_check_first_request(conn, login) < 0)
+			return -1;
+
+	ret = iscsi_encode_text_output(
+			PHASE_SECURITY|PHASE_DECLARATIVE,
+			SENDER_TARGET,
+			login->rsp_buf,
+			&login->rsp_length,
+			conn->param_list);
+	if (ret < 0)
+		return -1;
+
+	if (!iscsi_check_negotiated_keys(conn->param_list)) {
+		if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication &&
+		    !strncmp(param->value, NONE, 4)) {
+			pr_err("Initiator sent AuthMethod=None but"
+				" Target is enforcing iSCSI Authentication,"
+					" login failed.\n");
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+					ISCSI_LOGIN_STATUS_AUTH_FAILED);
+			return -1;
+		}
+
+		if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication &&
+		    !login->auth_complete)
+			return 0;
+
+		if (strncmp(param->value, NONE, 4) && !login->auth_complete)
+			return 0;
+
+		if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) &&
+		    (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+			login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE1 |
+					    ISCSI_FLAG_LOGIN_TRANSIT;
+			login->current_stage = 1;
+		}
+	}
+
+	return 0;
+do_auth:
+	return iscsi_target_do_authentication(conn, login);
+}
+
+static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+	int ret;
+	u32 payload_length;
+	struct iscsi_login_req *login_req;
+	struct iscsi_login_rsp *login_rsp;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	login_rsp = (struct iscsi_login_rsp *) login->rsp;
+	payload_length = ntoh24(login_req->dlength);
+
+	ret = iscsi_decode_text_input(
+			PHASE_OPERATIONAL|PHASE_DECLARATIVE,
+			SENDER_INITIATOR|SENDER_RECEIVER,
+			login->req_buf,
+			payload_length,
+			conn->param_list);
+	if (ret < 0)
+		return -1;
+
+	if (login->first_request)
+		if (iscsi_target_check_first_request(conn, login) < 0)
+			return -1;
+
+	if (iscsi_target_check_for_existing_instances(conn, login) < 0)
+		return -1;
+
+	ret = iscsi_encode_text_output(
+			PHASE_OPERATIONAL|PHASE_DECLARATIVE,
+			SENDER_TARGET,
+			login->rsp_buf,
+			&login->rsp_length,
+			conn->param_list);
+	if (ret < 0)
+		return -1;
+
+	if (!login->auth_complete &&
+	     ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) {
+		pr_err("Initiator is requesting CSG: 1, has not been"
+			 " successfully authenticated, and the Target is"
+			" enforcing iSCSI Authentication, login failed.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_AUTH_FAILED);
+		return -1;
+	}
+
+	if (!iscsi_check_negotiated_keys(conn->param_list))
+		if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE3) &&
+		    (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT))
+			login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE3 |
+					    ISCSI_FLAG_LOGIN_TRANSIT;
+
+	return 0;
+}
+
+static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+	int pdu_count = 0;
+	struct iscsi_login_req *login_req;
+	struct iscsi_login_rsp *login_rsp;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	login_rsp = (struct iscsi_login_rsp *) login->rsp;
+
+	while (1) {
+		if (++pdu_count > MAX_LOGIN_PDUS) {
+			pr_err("MAX_LOGIN_PDUS count reached.\n");
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+					ISCSI_LOGIN_STATUS_TARGET_ERROR);
+			return -1;
+		}
+
+		switch ((login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2) {
+		case 0:
+			login_rsp->flags |= (0 & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK);
+			if (iscsi_target_handle_csg_zero(conn, login) < 0)
+				return -1;
+			break;
+		case 1:
+			login_rsp->flags |= ISCSI_FLAG_LOGIN_CURRENT_STAGE1;
+			if (iscsi_target_handle_csg_one(conn, login) < 0)
+				return -1;
+			if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+				login->tsih = conn->sess->tsih;
+				if (iscsi_target_do_tx_login_io(conn,
+						login) < 0)
+					return -1;
+				return 0;
+			}
+			break;
+		default:
+			pr_err("Illegal CSG: %d received from"
+				" Initiator, protocol error.\n",
+				(login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK)
+				>> 2);
+			break;
+		}
+
+		if (iscsi_target_do_login_io(conn, login) < 0)
+			return -1;
+
+		if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+			login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT;
+			login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK;
+		}
+	}
+
+	return 0;
+}
+
+static void iscsi_initiatorname_tolower(
+	char *param_buf)
+{
+	char *c;
+	u32 iqn_size = strlen(param_buf), i;
+
+	for (i = 0; i < iqn_size; i++) {
+		c = (char *)&param_buf[i];
+		if (!isupper(*c))
+			continue;
+
+		*c = tolower(*c);
+	}
+}
+
+/*
+ * Processes the first Login Request..
+ */
+static int iscsi_target_locate_portal(
+	struct iscsi_np *np,
+	struct iscsi_conn *conn,
+	struct iscsi_login *login)
+{
+	char *i_buf = NULL, *s_buf = NULL, *t_buf = NULL;
+	char *tmpbuf, *start = NULL, *end = NULL, *key, *value;
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_tiqn *tiqn;
+	struct iscsi_login_req *login_req;
+	struct iscsi_targ_login_rsp *login_rsp;
+	u32 payload_length;
+	int sessiontype = 0, ret = 0;
+
+	login_req = (struct iscsi_login_req *) login->req;
+	login_rsp = (struct iscsi_targ_login_rsp *) login->rsp;
+	payload_length = ntoh24(login_req->dlength);
+
+	login->first_request	= 1;
+	login->leading_connection = (!login_req->tsih) ? 1 : 0;
+	login->current_stage	=
+		(login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+	login->version_min	= login_req->min_version;
+	login->version_max	= login_req->max_version;
+	memcpy(login->isid, login_req->isid, 6);
+	login->cmd_sn		= login_req->cmdsn;
+	login->init_task_tag	= login_req->itt;
+	login->initial_exp_statsn = login_req->exp_statsn;
+	login->cid		= login_req->cid;
+	login->tsih		= login_req->tsih;
+
+	if (iscsi_target_get_initial_payload(conn, login) < 0)
+		return -1;
+
+	tmpbuf = kzalloc(payload_length + 1, GFP_KERNEL);
+	if (!tmpbuf) {
+		pr_err("Unable to allocate memory for tmpbuf.\n");
+		return -1;
+	}
+
+	memcpy(tmpbuf, login->req_buf, payload_length);
+	tmpbuf[payload_length] = '\0';
+	start = tmpbuf;
+	end = (start + payload_length);
+
+	/*
+	 * Locate the initial keys expected from the Initiator node in
+	 * the first login request in order to progress with the login phase.
+	 */
+	while (start < end) {
+		if (iscsi_extract_key_value(start, &key, &value) < 0) {
+			ret = -1;
+			goto out;
+		}
+
+		if (!strncmp(key, "InitiatorName", 13))
+			i_buf = value;
+		else if (!strncmp(key, "SessionType", 11))
+			s_buf = value;
+		else if (!strncmp(key, "TargetName", 10))
+			t_buf = value;
+
+		start += strlen(key) + strlen(value) + 2;
+	}
+
+	/*
+	 * See 5.3.  Login Phase.
+	 */
+	if (!i_buf) {
+		pr_err("InitiatorName key not received"
+			" in first login request.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+			ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+		ret = -1;
+		goto out;
+	}
+	/*
+	 * Convert the incoming InitiatorName to lowercase following
+	 * RFC-3720 3.2.6.1. section c) that says that iSCSI IQNs
+	 * are NOT case sensitive.
+	 */
+	iscsi_initiatorname_tolower(i_buf);
+
+	if (!s_buf) {
+		if (!login->leading_connection)
+			goto get_target;
+
+		pr_err("SessionType key not received"
+			" in first login request.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+			ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+		ret = -1;
+		goto out;
+	}
+
+	/*
+	 * Use default portal group for discovery sessions.
+	 */
+	sessiontype = strncmp(s_buf, DISCOVERY, 9);
+	if (!sessiontype) {
+		conn->tpg = iscsit_global->discovery_tpg;
+		if (!login->leading_connection)
+			goto get_target;
+
+		sess->sess_ops->SessionType = 1;
+		/*
+		 * Setup crc32c modules from libcrypto
+		 */
+		if (iscsi_login_setup_crypto(conn) < 0) {
+			pr_err("iscsi_login_setup_crypto() failed\n");
+			ret = -1;
+			goto out;
+		}
+		/*
+		 * Serialize access across the discovery struct iscsi_portal_group to
+		 * process login attempt.
+		 */
+		if (iscsit_access_np(np, conn->tpg) < 0) {
+			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+			ret = -1;
+			goto out;
+		}
+		ret = 0;
+		goto out;
+	}
+
+get_target:
+	if (!t_buf) {
+		pr_err("TargetName key not received"
+			" in first login request while"
+			" SessionType=Normal.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+			ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+		ret = -1;
+		goto out;
+	}
+
+	/*
+	 * Locate Target IQN from Storage Node.
+	 */
+	tiqn = iscsit_get_tiqn_for_login(t_buf);
+	if (!tiqn) {
+		pr_err("Unable to locate Target IQN: %s in"
+			" Storage Node\n", t_buf);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+		ret = -1;
+		goto out;
+	}
+	pr_debug("Located Storage Object: %s\n", tiqn->tiqn);
+
+	/*
+	 * Locate Target Portal Group from Storage Node.
+	 */
+	conn->tpg = iscsit_get_tpg_from_np(tiqn, np);
+	if (!conn->tpg) {
+		pr_err("Unable to locate Target Portal Group"
+				" on %s\n", tiqn->tiqn);
+		iscsit_put_tiqn_for_login(tiqn);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+		ret = -1;
+		goto out;
+	}
+	pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt);
+	/*
+	 * Setup crc32c modules from libcrypto
+	 */
+	if (iscsi_login_setup_crypto(conn) < 0) {
+		pr_err("iscsi_login_setup_crypto() failed\n");
+		ret = -1;
+		goto out;
+	}
+	/*
+	 * Serialize access across the struct iscsi_portal_group to
+	 * process login attempt.
+	 */
+	if (iscsit_access_np(np, conn->tpg) < 0) {
+		iscsit_put_tiqn_for_login(tiqn);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+		ret = -1;
+		conn->tpg = NULL;
+		goto out;
+	}
+
+	/*
+	 * conn->sess->node_acl will be set when the referenced
+	 * struct iscsi_session is located from received ISID+TSIH in
+	 * iscsi_login_non_zero_tsih_s2().
+	 */
+	if (!login->leading_connection) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * This value is required in iscsi_login_zero_tsih_s2()
+	 */
+	sess->sess_ops->SessionType = 0;
+
+	/*
+	 * Locate incoming Initiator IQN reference from Storage Node.
+	 */
+	sess->se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
+			&conn->tpg->tpg_se_tpg, i_buf);
+	if (!sess->se_sess->se_node_acl) {
+		pr_err("iSCSI Initiator Node: %s is not authorized to"
+			" access iSCSI target portal group: %hu.\n",
+				i_buf, conn->tpg->tpgt);
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+				ISCSI_LOGIN_STATUS_TGT_FORBIDDEN);
+		ret = -1;
+		goto out;
+	}
+
+	ret = 0;
+out:
+	kfree(tmpbuf);
+	return ret;
+}
+
+struct iscsi_login *iscsi_target_init_negotiation(
+	struct iscsi_np *np,
+	struct iscsi_conn *conn,
+	char *login_pdu)
+{
+	struct iscsi_login *login;
+
+	login = kzalloc(sizeof(struct iscsi_login), GFP_KERNEL);
+	if (!login) {
+		pr_err("Unable to allocate memory for struct iscsi_login.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		goto out;
+	}
+
+	login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+	if (!login->req) {
+		pr_err("Unable to allocate memory for Login Request.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		goto out;
+	}
+	memcpy(login->req, login_pdu, ISCSI_HDR_LEN);
+
+	login->req_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL);
+	if (!login->req_buf) {
+		pr_err("Unable to allocate memory for response buffer.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		goto out;
+	}
+	/*
+	 * SessionType: Discovery
+	 *
+	 *	Locates Default Portal
+	 *
+	 * SessionType: Normal
+	 *
+	 *	Locates Target Portal from NP -> Target IQN
+	 */
+	if (iscsi_target_locate_portal(np, conn, login) < 0) {
+		pr_err("iSCSI Login negotiation failed.\n");
+		goto out;
+	}
+
+	return login;
+out:
+	kfree(login->req);
+	kfree(login->req_buf);
+	kfree(login);
+
+	return NULL;
+}
+
+int iscsi_target_start_negotiation(
+	struct iscsi_login *login,
+	struct iscsi_conn *conn)
+{
+	int ret = -1;
+
+	login->rsp = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+	if (!login->rsp) {
+		pr_err("Unable to allocate memory for"
+				" Login Response.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		ret = -1;
+		goto out;
+	}
+
+	login->rsp_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL);
+	if (!login->rsp_buf) {
+		pr_err("Unable to allocate memory for"
+			" request buffer.\n");
+		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+				ISCSI_LOGIN_STATUS_NO_RESOURCES);
+		ret = -1;
+		goto out;
+	}
+
+	ret = iscsi_target_do_login(conn, login);
+out:
+	if (ret != 0)
+		iscsi_remove_failed_auth_entry(conn);
+
+	iscsi_target_nego_release(login, conn);
+	return ret;
+}
+
+void iscsi_target_nego_release(
+	struct iscsi_login *login,
+	struct iscsi_conn *conn)
+{
+	kfree(login->req);
+	kfree(login->rsp);
+	kfree(login->req_buf);
+	kfree(login->rsp_buf);
+	kfree(login);
+}
diff --git a/drivers/target/iscsi/iscsi_target_nego.h b/drivers/target/iscsi/iscsi_target_nego.h
new file mode 100644
index 0000000..92e133a
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nego.h
@@ -0,0 +1,17 @@
+#ifndef ISCSI_TARGET_NEGO_H
+#define ISCSI_TARGET_NEGO_H
+
+#define DECIMAL         0
+#define HEX             1
+
+extern void convert_null_to_semi(char *, int);
+extern int extract_param(const char *, const char *, unsigned int, char *,
+		unsigned char *);
+extern struct iscsi_login *iscsi_target_init_negotiation(
+		struct iscsi_np *, struct iscsi_conn *, char *);
+extern int iscsi_target_start_negotiation(
+		struct iscsi_login *, struct iscsi_conn *);
+extern void iscsi_target_nego_release(
+		struct iscsi_login *, struct iscsi_conn *);
+
+#endif /* ISCSI_TARGET_NEGO_H */
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c
new file mode 100644
index 0000000..aeafbe0
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c
@@ -0,0 +1,263 @@
+/*******************************************************************************
+ * This file contains the main functions related to Initiator Node Attributes.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_nodeattrib.h"
+
+static inline char *iscsit_na_get_initiatorname(
+	struct iscsi_node_acl *nacl)
+{
+	struct se_node_acl *se_nacl = &nacl->se_node_acl;
+
+	return &se_nacl->initiatorname[0];
+}
+
+void iscsit_set_default_node_attribues(
+	struct iscsi_node_acl *acl)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	a->dataout_timeout = NA_DATAOUT_TIMEOUT;
+	a->dataout_timeout_retries = NA_DATAOUT_TIMEOUT_RETRIES;
+	a->nopin_timeout = NA_NOPIN_TIMEOUT;
+	a->nopin_response_timeout = NA_NOPIN_RESPONSE_TIMEOUT;
+	a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS;
+	a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS;
+	a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS;
+	a->default_erl = NA_DEFAULT_ERL;
+}
+
+extern int iscsit_na_dataout_timeout(
+	struct iscsi_node_acl *acl,
+	u32 dataout_timeout)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (dataout_timeout > NA_DATAOUT_TIMEOUT_MAX) {
+		pr_err("Requested DataOut Timeout %u larger than"
+			" maximum %u\n", dataout_timeout,
+			NA_DATAOUT_TIMEOUT_MAX);
+		return -EINVAL;
+	} else if (dataout_timeout < NA_DATAOUT_TIMEOUT_MIX) {
+		pr_err("Requested DataOut Timeout %u smaller than"
+			" minimum %u\n", dataout_timeout,
+			NA_DATAOUT_TIMEOUT_MIX);
+		return -EINVAL;
+	}
+
+	a->dataout_timeout = dataout_timeout;
+	pr_debug("Set DataOut Timeout to %u for Initiator Node"
+		" %s\n", a->dataout_timeout, iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
+
+extern int iscsit_na_dataout_timeout_retries(
+	struct iscsi_node_acl *acl,
+	u32 dataout_timeout_retries)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (dataout_timeout_retries > NA_DATAOUT_TIMEOUT_RETRIES_MAX) {
+		pr_err("Requested DataOut Timeout Retries %u larger"
+			" than maximum %u", dataout_timeout_retries,
+				NA_DATAOUT_TIMEOUT_RETRIES_MAX);
+		return -EINVAL;
+	} else if (dataout_timeout_retries < NA_DATAOUT_TIMEOUT_RETRIES_MIN) {
+		pr_err("Requested DataOut Timeout Retries %u smaller"
+			" than minimum %u", dataout_timeout_retries,
+				NA_DATAOUT_TIMEOUT_RETRIES_MIN);
+		return -EINVAL;
+	}
+
+	a->dataout_timeout_retries = dataout_timeout_retries;
+	pr_debug("Set DataOut Timeout Retries to %u for"
+		" Initiator Node %s\n", a->dataout_timeout_retries,
+		iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
+
+extern int iscsit_na_nopin_timeout(
+	struct iscsi_node_acl *acl,
+	u32 nopin_timeout)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+	struct iscsi_session *sess;
+	struct iscsi_conn *conn;
+	struct se_node_acl *se_nacl = &a->nacl->se_node_acl;
+	struct se_session *se_sess;
+	u32 orig_nopin_timeout = a->nopin_timeout;
+
+	if (nopin_timeout > NA_NOPIN_TIMEOUT_MAX) {
+		pr_err("Requested NopIn Timeout %u larger than maximum"
+			" %u\n", nopin_timeout, NA_NOPIN_TIMEOUT_MAX);
+		return -EINVAL;
+	} else if ((nopin_timeout < NA_NOPIN_TIMEOUT_MIN) &&
+		   (nopin_timeout != 0)) {
+		pr_err("Requested NopIn Timeout %u smaller than"
+			" minimum %u and not 0\n", nopin_timeout,
+			NA_NOPIN_TIMEOUT_MIN);
+		return -EINVAL;
+	}
+
+	a->nopin_timeout = nopin_timeout;
+	pr_debug("Set NopIn Timeout to %u for Initiator"
+		" Node %s\n", a->nopin_timeout,
+		iscsit_na_get_initiatorname(acl));
+	/*
+	 * Reenable disabled nopin_timeout timer for all iSCSI connections.
+	 */
+	if (!orig_nopin_timeout) {
+		spin_lock_bh(&se_nacl->nacl_sess_lock);
+		se_sess = se_nacl->nacl_sess;
+		if (se_sess) {
+			sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+
+			spin_lock(&sess->conn_lock);
+			list_for_each_entry(conn, &sess->sess_conn_list,
+					conn_list) {
+				if (conn->conn_state !=
+						TARG_CONN_STATE_LOGGED_IN)
+					continue;
+
+				spin_lock(&conn->nopin_timer_lock);
+				__iscsit_start_nopin_timer(conn);
+				spin_unlock(&conn->nopin_timer_lock);
+			}
+			spin_unlock(&sess->conn_lock);
+		}
+		spin_unlock_bh(&se_nacl->nacl_sess_lock);
+	}
+
+	return 0;
+}
+
+extern int iscsit_na_nopin_response_timeout(
+	struct iscsi_node_acl *acl,
+	u32 nopin_response_timeout)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (nopin_response_timeout > NA_NOPIN_RESPONSE_TIMEOUT_MAX) {
+		pr_err("Requested NopIn Response Timeout %u larger"
+			" than maximum %u\n", nopin_response_timeout,
+				NA_NOPIN_RESPONSE_TIMEOUT_MAX);
+		return -EINVAL;
+	} else if (nopin_response_timeout < NA_NOPIN_RESPONSE_TIMEOUT_MIN) {
+		pr_err("Requested NopIn Response Timeout %u smaller"
+			" than minimum %u\n", nopin_response_timeout,
+				NA_NOPIN_RESPONSE_TIMEOUT_MIN);
+		return -EINVAL;
+	}
+
+	a->nopin_response_timeout = nopin_response_timeout;
+	pr_debug("Set NopIn Response Timeout to %u for"
+		" Initiator Node %s\n", a->nopin_timeout,
+		iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
+
+extern int iscsit_na_random_datain_pdu_offsets(
+	struct iscsi_node_acl *acl,
+	u32 random_datain_pdu_offsets)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (random_datain_pdu_offsets != 0 && random_datain_pdu_offsets != 1) {
+		pr_err("Requested Random DataIN PDU Offsets: %u not"
+			" 0 or 1\n", random_datain_pdu_offsets);
+		return -EINVAL;
+	}
+
+	a->random_datain_pdu_offsets = random_datain_pdu_offsets;
+	pr_debug("Set Random DataIN PDU Offsets to %u for"
+		" Initiator Node %s\n", a->random_datain_pdu_offsets,
+		iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
+
+extern int iscsit_na_random_datain_seq_offsets(
+	struct iscsi_node_acl *acl,
+	u32 random_datain_seq_offsets)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (random_datain_seq_offsets != 0 && random_datain_seq_offsets != 1) {
+		pr_err("Requested Random DataIN Sequence Offsets: %u"
+			" not 0 or 1\n", random_datain_seq_offsets);
+		return -EINVAL;
+	}
+
+	a->random_datain_seq_offsets = random_datain_seq_offsets;
+	pr_debug("Set Random DataIN Sequence Offsets to %u for"
+		" Initiator Node %s\n", a->random_datain_seq_offsets,
+		iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
+
+extern int iscsit_na_random_r2t_offsets(
+	struct iscsi_node_acl *acl,
+	u32 random_r2t_offsets)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (random_r2t_offsets != 0 && random_r2t_offsets != 1) {
+		pr_err("Requested Random R2T Offsets: %u not"
+			" 0 or 1\n", random_r2t_offsets);
+		return -EINVAL;
+	}
+
+	a->random_r2t_offsets = random_r2t_offsets;
+	pr_debug("Set Random R2T Offsets to %u for"
+		" Initiator Node %s\n", a->random_r2t_offsets,
+		iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
+
+extern int iscsit_na_default_erl(
+	struct iscsi_node_acl *acl,
+	u32 default_erl)
+{
+	struct iscsi_node_attrib *a = &acl->node_attrib;
+
+	if (default_erl != 0 && default_erl != 1 && default_erl != 2) {
+		pr_err("Requested default ERL: %u not 0, 1, or 2\n",
+				default_erl);
+		return -EINVAL;
+	}
+
+	a->default_erl = default_erl;
+	pr_debug("Set use ERL0 flag to %u for Initiator"
+		" Node %s\n", a->default_erl,
+		iscsit_na_get_initiatorname(acl));
+
+	return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h
new file mode 100644
index 0000000..c970b326
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h
@@ -0,0 +1,14 @@
+#ifndef ISCSI_TARGET_NODEATTRIB_H
+#define ISCSI_TARGET_NODEATTRIB_H
+
+extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *);
+extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32);
+extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_nopin_response_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_datain_pdu_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_datain_seq_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_r2t_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_default_erl(struct iscsi_node_acl *, u32);
+
+#endif /* ISCSI_TARGET_NODEATTRIB_H */
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
new file mode 100644
index 0000000..252e246
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -0,0 +1,1905 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI Parameter negotiation.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/slab.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_parameters.h"
+
+int iscsi_login_rx_data(
+	struct iscsi_conn *conn,
+	char *buf,
+	int length)
+{
+	int rx_got;
+	struct kvec iov;
+
+	memset(&iov, 0, sizeof(struct kvec));
+	iov.iov_len	= length;
+	iov.iov_base	= buf;
+
+	/*
+	 * Initial Marker-less Interval.
+	 * Add the values regardless of IFMarker/OFMarker, considering
+	 * it may not be negoitated yet.
+	 */
+	conn->of_marker += length;
+
+	rx_got = rx_data(conn, &iov, 1, length);
+	if (rx_got != length) {
+		pr_err("rx_data returned %d, expecting %d.\n",
+				rx_got, length);
+		return -1;
+	}
+
+	return 0 ;
+}
+
+int iscsi_login_tx_data(
+	struct iscsi_conn *conn,
+	char *pdu_buf,
+	char *text_buf,
+	int text_length)
+{
+	int length, tx_sent;
+	struct kvec iov[2];
+
+	length = (ISCSI_HDR_LEN + text_length);
+
+	memset(&iov[0], 0, 2 * sizeof(struct kvec));
+	iov[0].iov_len		= ISCSI_HDR_LEN;
+	iov[0].iov_base		= pdu_buf;
+	iov[1].iov_len		= text_length;
+	iov[1].iov_base		= text_buf;
+
+	/*
+	 * Initial Marker-less Interval.
+	 * Add the values regardless of IFMarker/OFMarker, considering
+	 * it may not be negoitated yet.
+	 */
+	conn->if_marker += length;
+
+	tx_sent = tx_data(conn, &iov[0], 2, length);
+	if (tx_sent != length) {
+		pr_err("tx_data returned %d, expecting %d.\n",
+				tx_sent, length);
+		return -1;
+	}
+
+	return 0;
+}
+
+void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops)
+{
+	pr_debug("HeaderDigest: %s\n", (conn_ops->HeaderDigest) ?
+				"CRC32C" : "None");
+	pr_debug("DataDigest: %s\n", (conn_ops->DataDigest) ?
+				"CRC32C" : "None");
+	pr_debug("MaxRecvDataSegmentLength: %u\n",
+				conn_ops->MaxRecvDataSegmentLength);
+	pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No");
+	pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No");
+	if (conn_ops->OFMarker)
+		pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt);
+	if (conn_ops->IFMarker)
+		pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt);
+}
+
+void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops)
+{
+	pr_debug("InitiatorName: %s\n", sess_ops->InitiatorName);
+	pr_debug("InitiatorAlias: %s\n", sess_ops->InitiatorAlias);
+	pr_debug("TargetName: %s\n", sess_ops->TargetName);
+	pr_debug("TargetAlias: %s\n", sess_ops->TargetAlias);
+	pr_debug("TargetPortalGroupTag: %hu\n",
+			sess_ops->TargetPortalGroupTag);
+	pr_debug("MaxConnections: %hu\n", sess_ops->MaxConnections);
+	pr_debug("InitialR2T: %s\n",
+			(sess_ops->InitialR2T) ? "Yes" : "No");
+	pr_debug("ImmediateData: %s\n", (sess_ops->ImmediateData) ?
+			"Yes" : "No");
+	pr_debug("MaxBurstLength: %u\n", sess_ops->MaxBurstLength);
+	pr_debug("FirstBurstLength: %u\n", sess_ops->FirstBurstLength);
+	pr_debug("DefaultTime2Wait: %hu\n", sess_ops->DefaultTime2Wait);
+	pr_debug("DefaultTime2Retain: %hu\n",
+			sess_ops->DefaultTime2Retain);
+	pr_debug("MaxOutstandingR2T: %hu\n",
+			sess_ops->MaxOutstandingR2T);
+	pr_debug("DataPDUInOrder: %s\n",
+			(sess_ops->DataPDUInOrder) ? "Yes" : "No");
+	pr_debug("DataSequenceInOrder: %s\n",
+			(sess_ops->DataSequenceInOrder) ? "Yes" : "No");
+	pr_debug("ErrorRecoveryLevel: %hu\n",
+			sess_ops->ErrorRecoveryLevel);
+	pr_debug("SessionType: %s\n", (sess_ops->SessionType) ?
+			"Discovery" : "Normal");
+}
+
+void iscsi_print_params(struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param;
+
+	list_for_each_entry(param, &param_list->param_list, p_list)
+		pr_debug("%s: %s\n", param->name, param->value);
+}
+
+static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *param_list,
+		char *name, char *value, u8 phase, u8 scope, u8 sender,
+		u16 type_range, u8 use)
+{
+	struct iscsi_param *param = NULL;
+
+	param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
+	if (!param) {
+		pr_err("Unable to allocate memory for parameter.\n");
+		goto out;
+	}
+	INIT_LIST_HEAD(&param->p_list);
+
+	param->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+	if (!param->name) {
+		pr_err("Unable to allocate memory for parameter name.\n");
+		goto out;
+	}
+
+	param->value = kzalloc(strlen(value) + 1, GFP_KERNEL);
+	if (!param->value) {
+		pr_err("Unable to allocate memory for parameter value.\n");
+		goto out;
+	}
+
+	memcpy(param->name, name, strlen(name));
+	param->name[strlen(name)] = '\0';
+	memcpy(param->value, value, strlen(value));
+	param->value[strlen(value)] = '\0';
+	param->phase		= phase;
+	param->scope		= scope;
+	param->sender		= sender;
+	param->use		= use;
+	param->type_range	= type_range;
+
+	switch (param->type_range) {
+	case TYPERANGE_BOOL_AND:
+		param->type = TYPE_BOOL_AND;
+		break;
+	case TYPERANGE_BOOL_OR:
+		param->type = TYPE_BOOL_OR;
+		break;
+	case TYPERANGE_0_TO_2:
+	case TYPERANGE_0_TO_3600:
+	case TYPERANGE_0_TO_32767:
+	case TYPERANGE_0_TO_65535:
+	case TYPERANGE_1_TO_65535:
+	case TYPERANGE_2_TO_3600:
+	case TYPERANGE_512_TO_16777215:
+		param->type = TYPE_NUMBER;
+		break;
+	case TYPERANGE_AUTH:
+	case TYPERANGE_DIGEST:
+		param->type = TYPE_VALUE_LIST | TYPE_STRING;
+		break;
+	case TYPERANGE_MARKINT:
+		param->type = TYPE_NUMBER_RANGE;
+		param->type_range |= TYPERANGE_1_TO_65535;
+		break;
+	case TYPERANGE_ISCSINAME:
+	case TYPERANGE_SESSIONTYPE:
+	case TYPERANGE_TARGETADDRESS:
+	case TYPERANGE_UTF8:
+		param->type = TYPE_STRING;
+		break;
+	default:
+		pr_err("Unknown type_range 0x%02x\n",
+				param->type_range);
+		goto out;
+	}
+	list_add_tail(&param->p_list, &param_list->param_list);
+
+	return param;
+out:
+	if (param) {
+		kfree(param->value);
+		kfree(param->name);
+		kfree(param);
+	}
+
+	return NULL;
+}
+
+/* #warning Add extension keys */
+int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr)
+{
+	struct iscsi_param *param = NULL;
+	struct iscsi_param_list *pl;
+
+	pl = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
+	if (!pl) {
+		pr_err("Unable to allocate memory for"
+				" struct iscsi_param_list.\n");
+		return -1 ;
+	}
+	INIT_LIST_HEAD(&pl->param_list);
+	INIT_LIST_HEAD(&pl->extra_response_list);
+
+	/*
+	 * The format for setting the initial parameter definitions are:
+	 *
+	 * Parameter name:
+	 * Initial value:
+	 * Allowable phase:
+	 * Scope:
+	 * Allowable senders:
+	 * Typerange:
+	 * Use:
+	 */
+	param = iscsi_set_default_param(pl, AUTHMETHOD, INITIAL_AUTHMETHOD,
+			PHASE_SECURITY, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_AUTH, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, HEADERDIGEST, INITIAL_HEADERDIGEST,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_DIGEST, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, DATADIGEST, INITIAL_DATADIGEST,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_DIGEST, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, MAXCONNECTIONS,
+			INITIAL_MAXCONNECTIONS, PHASE_OPERATIONAL,
+			SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_1_TO_65535, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, SENDTARGETS, INITIAL_SENDTARGETS,
+			PHASE_FFP0, SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+			TYPERANGE_UTF8, 0);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, TARGETNAME, INITIAL_TARGETNAME,
+			PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_ISCSINAME, USE_ALL);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, INITIATORNAME,
+			INITIAL_INITIATORNAME, PHASE_DECLARATIVE,
+			SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+			TYPERANGE_ISCSINAME, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, TARGETALIAS, INITIAL_TARGETALIAS,
+			PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET,
+			TYPERANGE_UTF8, USE_ALL);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, INITIATORALIAS,
+			INITIAL_INITIATORALIAS, PHASE_DECLARATIVE,
+			SCOPE_SESSION_WIDE, SENDER_INITIATOR, TYPERANGE_UTF8,
+			USE_ALL);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, TARGETADDRESS,
+			INITIAL_TARGETADDRESS, PHASE_DECLARATIVE,
+			SCOPE_SESSION_WIDE, SENDER_TARGET,
+			TYPERANGE_TARGETADDRESS, USE_ALL);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, TARGETPORTALGROUPTAG,
+			INITIAL_TARGETPORTALGROUPTAG,
+			PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET,
+			TYPERANGE_0_TO_65535, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, INITIALR2T, INITIAL_INITIALR2T,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_BOOL_OR, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, IMMEDIATEDATA,
+			INITIAL_IMMEDIATEDATA, PHASE_OPERATIONAL,
+			SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_AND,
+			USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, MAXRECVDATASEGMENTLENGTH,
+			INITIAL_MAXRECVDATASEGMENTLENGTH,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_512_TO_16777215, USE_ALL);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, MAXBURSTLENGTH,
+			INITIAL_MAXBURSTLENGTH, PHASE_OPERATIONAL,
+			SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_512_TO_16777215, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, FIRSTBURSTLENGTH,
+			INITIAL_FIRSTBURSTLENGTH,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_512_TO_16777215, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, DEFAULTTIME2WAIT,
+			INITIAL_DEFAULTTIME2WAIT,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_0_TO_3600, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, DEFAULTTIME2RETAIN,
+			INITIAL_DEFAULTTIME2RETAIN,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_0_TO_3600, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, MAXOUTSTANDINGR2T,
+			INITIAL_MAXOUTSTANDINGR2T,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_1_TO_65535, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, DATAPDUINORDER,
+			INITIAL_DATAPDUINORDER, PHASE_OPERATIONAL,
+			SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_OR,
+			USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, DATASEQUENCEINORDER,
+			INITIAL_DATASEQUENCEINORDER,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_BOOL_OR, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, ERRORRECOVERYLEVEL,
+			INITIAL_ERRORRECOVERYLEVEL,
+			PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+			TYPERANGE_0_TO_2, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, SESSIONTYPE, INITIAL_SESSIONTYPE,
+			PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+			TYPERANGE_SESSIONTYPE, USE_LEADING_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, IFMARKER, INITIAL_IFMARKER,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_BOOL_AND, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, OFMARKER, INITIAL_OFMARKER,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_BOOL_AND, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_MARKINT, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT,
+			PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+			TYPERANGE_MARKINT, USE_INITIAL_ONLY);
+	if (!param)
+		goto out;
+
+	*param_list_ptr = pl;
+	return 0;
+out:
+	iscsi_release_param_list(pl);
+	return -1;
+}
+
+int iscsi_set_keys_to_negotiate(
+	int sessiontype,
+	struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param;
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		param->state = 0;
+		if (!strcmp(param->name, AUTHMETHOD)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, HEADERDIGEST)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, DATADIGEST)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, MAXCONNECTIONS)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, TARGETNAME)) {
+			continue;
+		} else if (!strcmp(param->name, INITIATORNAME)) {
+			continue;
+		} else if (!strcmp(param->name, TARGETALIAS)) {
+			if (param->value)
+				SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, INITIATORALIAS)) {
+			continue;
+		} else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, INITIALR2T)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, IMMEDIATEDATA)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, MAXBURSTLENGTH)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, DATAPDUINORDER)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, DATASEQUENCEINORDER)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, SESSIONTYPE)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, IFMARKER)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, OFMARKER)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, IFMARKINT)) {
+			SET_PSTATE_NEGOTIATE(param);
+		} else if (!strcmp(param->name, OFMARKINT)) {
+			SET_PSTATE_NEGOTIATE(param);
+		}
+	}
+
+	return 0;
+}
+
+int iscsi_set_keys_irrelevant_for_discovery(
+	struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param;
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!strcmp(param->name, MAXCONNECTIONS))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, INITIALR2T))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, IMMEDIATEDATA))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, MAXBURSTLENGTH))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, FIRSTBURSTLENGTH))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, MAXOUTSTANDINGR2T))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, DATAPDUINORDER))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, DATASEQUENCEINORDER))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, ERRORRECOVERYLEVEL))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, DEFAULTTIME2WAIT))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, DEFAULTTIME2RETAIN))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, IFMARKER))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, OFMARKER))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, IFMARKINT))
+			param->state &= ~PSTATE_NEGOTIATE;
+		else if (!strcmp(param->name, OFMARKINT))
+			param->state &= ~PSTATE_NEGOTIATE;
+	}
+
+	return 0;
+}
+
+int iscsi_copy_param_list(
+	struct iscsi_param_list **dst_param_list,
+	struct iscsi_param_list *src_param_list,
+	int leading)
+{
+	struct iscsi_param *new_param = NULL, *param = NULL;
+	struct iscsi_param_list *param_list = NULL;
+
+	param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
+	if (!param_list) {
+		pr_err("Unable to allocate memory for"
+				" struct iscsi_param_list.\n");
+		goto err_out;
+	}
+	INIT_LIST_HEAD(&param_list->param_list);
+	INIT_LIST_HEAD(&param_list->extra_response_list);
+
+	list_for_each_entry(param, &src_param_list->param_list, p_list) {
+		if (!leading && (param->scope & SCOPE_SESSION_WIDE)) {
+			if ((strcmp(param->name, "TargetName") != 0) &&
+			    (strcmp(param->name, "InitiatorName") != 0) &&
+			    (strcmp(param->name, "TargetPortalGroupTag") != 0))
+				continue;
+		}
+
+		new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
+		if (!new_param) {
+			pr_err("Unable to allocate memory for"
+				" struct iscsi_param.\n");
+			goto err_out;
+		}
+
+		new_param->set_param = param->set_param;
+		new_param->phase = param->phase;
+		new_param->scope = param->scope;
+		new_param->sender = param->sender;
+		new_param->type = param->type;
+		new_param->use = param->use;
+		new_param->type_range = param->type_range;
+
+		new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
+		if (!new_param->name) {
+			pr_err("Unable to allocate memory for"
+				" parameter name.\n");
+			goto err_out;
+		}
+
+		new_param->value = kzalloc(strlen(param->value) + 1,
+				GFP_KERNEL);
+		if (!new_param->value) {
+			pr_err("Unable to allocate memory for"
+				" parameter value.\n");
+			goto err_out;
+		}
+
+		memcpy(new_param->name, param->name, strlen(param->name));
+		new_param->name[strlen(param->name)] = '\0';
+		memcpy(new_param->value, param->value, strlen(param->value));
+		new_param->value[strlen(param->value)] = '\0';
+
+		list_add_tail(&new_param->p_list, &param_list->param_list);
+	}
+
+	if (!list_empty(&param_list->param_list))
+		*dst_param_list = param_list;
+	else {
+		pr_err("No parameters allocated.\n");
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	iscsi_release_param_list(param_list);
+	return -1;
+}
+
+static void iscsi_release_extra_responses(struct iscsi_param_list *param_list)
+{
+	struct iscsi_extra_response *er, *er_tmp;
+
+	list_for_each_entry_safe(er, er_tmp, &param_list->extra_response_list,
+			er_list) {
+		list_del(&er->er_list);
+		kfree(er);
+	}
+}
+
+void iscsi_release_param_list(struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param, *param_tmp;
+
+	list_for_each_entry_safe(param, param_tmp, &param_list->param_list,
+			p_list) {
+		list_del(&param->p_list);
+
+		kfree(param->name);
+		param->name = NULL;
+		kfree(param->value);
+		param->value = NULL;
+		kfree(param);
+		param = NULL;
+	}
+
+	iscsi_release_extra_responses(param_list);
+
+	kfree(param_list);
+}
+
+struct iscsi_param *iscsi_find_param_from_key(
+	char *key,
+	struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param;
+
+	if (!key || !param_list) {
+		pr_err("Key or parameter list pointer is NULL.\n");
+		return NULL;
+	}
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!strcmp(key, param->name))
+			return param;
+	}
+
+	pr_err("Unable to locate key \"%s\".\n", key);
+	return NULL;
+}
+
+int iscsi_extract_key_value(char *textbuf, char **key, char **value)
+{
+	*value = strchr(textbuf, '=');
+	if (!*value) {
+		pr_err("Unable to locate \"=\" seperator for key,"
+				" ignoring request.\n");
+		return -1;
+	}
+
+	*key = textbuf;
+	**value = '\0';
+	*value = *value + 1;
+
+	return 0;
+}
+
+int iscsi_update_param_value(struct iscsi_param *param, char *value)
+{
+	kfree(param->value);
+
+	param->value = kzalloc(strlen(value) + 1, GFP_KERNEL);
+	if (!param->value) {
+		pr_err("Unable to allocate memory for value.\n");
+		return -1;
+	}
+
+	memcpy(param->value, value, strlen(value));
+	param->value[strlen(value)] = '\0';
+
+	pr_debug("iSCSI Parameter updated to %s=%s\n",
+			param->name, param->value);
+	return 0;
+}
+
+static int iscsi_add_notunderstood_response(
+	char *key,
+	char *value,
+	struct iscsi_param_list *param_list)
+{
+	struct iscsi_extra_response *extra_response;
+
+	if (strlen(value) > VALUE_MAXLEN) {
+		pr_err("Value for notunderstood key \"%s\" exceeds %d,"
+			" protocol error.\n", key, VALUE_MAXLEN);
+		return -1;
+	}
+
+	extra_response = kzalloc(sizeof(struct iscsi_extra_response), GFP_KERNEL);
+	if (!extra_response) {
+		pr_err("Unable to allocate memory for"
+			" struct iscsi_extra_response.\n");
+		return -1;
+	}
+	INIT_LIST_HEAD(&extra_response->er_list);
+
+	strncpy(extra_response->key, key, strlen(key) + 1);
+	strncpy(extra_response->value, NOTUNDERSTOOD,
+			strlen(NOTUNDERSTOOD) + 1);
+
+	list_add_tail(&extra_response->er_list,
+			&param_list->extra_response_list);
+	return 0;
+}
+
+static int iscsi_check_for_auth_key(char *key)
+{
+	/*
+	 * RFC 1994
+	 */
+	if (!strcmp(key, "CHAP_A") || !strcmp(key, "CHAP_I") ||
+	    !strcmp(key, "CHAP_C") || !strcmp(key, "CHAP_N") ||
+	    !strcmp(key, "CHAP_R"))
+		return 1;
+
+	/*
+	 * RFC 2945
+	 */
+	if (!strcmp(key, "SRP_U") || !strcmp(key, "SRP_N") ||
+	    !strcmp(key, "SRP_g") || !strcmp(key, "SRP_s") ||
+	    !strcmp(key, "SRP_A") || !strcmp(key, "SRP_B") ||
+	    !strcmp(key, "SRP_M") || !strcmp(key, "SRP_HM"))
+		return 1;
+
+	return 0;
+}
+
+static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
+{
+	if (IS_TYPE_BOOL_AND(param)) {
+		if (!strcmp(param->value, NO))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+	} else if (IS_TYPE_BOOL_OR(param)) {
+		if (!strcmp(param->value, YES))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+		 /*
+		  * Required for gPXE iSCSI boot client
+		  */
+		if (!strcmp(param->name, IMMEDIATEDATA))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+	} else if (IS_TYPE_NUMBER(param)) {
+		if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+		/*
+		 * The GlobalSAN iSCSI Initiator for MacOSX does
+		 * not respond to MaxBurstLength, FirstBurstLength,
+		 * DefaultTime2Wait or DefaultTime2Retain parameter keys.
+		 * So, we set them to 'reply optional' here, and assume the
+		 * the defaults from iscsi_parameters.h if the initiator
+		 * is not RFC compliant and the keys are not negotiated.
+		 */
+		if (!strcmp(param->name, MAXBURSTLENGTH))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+		if (!strcmp(param->name, FIRSTBURSTLENGTH))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+		if (!strcmp(param->name, DEFAULTTIME2WAIT))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+		if (!strcmp(param->name, DEFAULTTIME2RETAIN))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+		/*
+		 * Required for gPXE iSCSI boot client
+		 */
+		if (!strcmp(param->name, MAXCONNECTIONS))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+	} else if (IS_PHASE_DECLARATIVE(param))
+		SET_PSTATE_REPLY_OPTIONAL(param);
+}
+
+static int iscsi_check_boolean_value(struct iscsi_param *param, char *value)
+{
+	if (strcmp(value, YES) && strcmp(value, NO)) {
+		pr_err("Illegal value for \"%s\", must be either"
+			" \"%s\" or \"%s\".\n", param->name, YES, NO);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_ptr)
+{
+	char *tmpptr;
+	int value = 0;
+
+	value = simple_strtoul(value_ptr, &tmpptr, 0);
+
+/* #warning FIXME: Fix this */
+#if 0
+	if (strspn(endptr, WHITE_SPACE) != strlen(endptr)) {
+		pr_err("Illegal value \"%s\" for \"%s\".\n",
+			value, param->name);
+		return -1;
+	}
+#endif
+	if (IS_TYPERANGE_0_TO_2(param)) {
+		if ((value < 0) || (value > 2)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 0 and 2.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+	if (IS_TYPERANGE_0_TO_3600(param)) {
+		if ((value < 0) || (value > 3600)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 0 and 3600.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+	if (IS_TYPERANGE_0_TO_32767(param)) {
+		if ((value < 0) || (value > 32767)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 0 and 32767.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+	if (IS_TYPERANGE_0_TO_65535(param)) {
+		if ((value < 0) || (value > 65535)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 0 and 65535.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+	if (IS_TYPERANGE_1_TO_65535(param)) {
+		if ((value < 1) || (value > 65535)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 1 and 65535.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+	if (IS_TYPERANGE_2_TO_3600(param)) {
+		if ((value < 2) || (value > 3600)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 2 and 3600.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+	if (IS_TYPERANGE_512_TO_16777215(param)) {
+		if ((value < 512) || (value > 16777215)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" between 512 and 16777215.\n", param->name);
+			return -1;
+		}
+		return 0;
+	}
+
+	return 0;
+}
+
+static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value)
+{
+	char *left_val_ptr = NULL, *right_val_ptr = NULL;
+	char *tilde_ptr = NULL, *tmp_ptr = NULL;
+	u32 left_val, right_val, local_left_val, local_right_val;
+
+	if (strcmp(param->name, IFMARKINT) &&
+	    strcmp(param->name, OFMARKINT)) {
+		pr_err("Only parameters \"%s\" or \"%s\" may contain a"
+		       " numerical range value.\n", IFMARKINT, OFMARKINT);
+		return -1;
+	}
+
+	if (IS_PSTATE_PROPOSER(param))
+		return 0;
+
+	tilde_ptr = strchr(value, '~');
+	if (!tilde_ptr) {
+		pr_err("Unable to locate numerical range indicator"
+			" \"~\" for \"%s\".\n", param->name);
+		return -1;
+	}
+	*tilde_ptr = '\0';
+
+	left_val_ptr = value;
+	right_val_ptr = value + strlen(left_val_ptr) + 1;
+
+	if (iscsi_check_numerical_value(param, left_val_ptr) < 0)
+		return -1;
+	if (iscsi_check_numerical_value(param, right_val_ptr) < 0)
+		return -1;
+
+	left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+	right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+	*tilde_ptr = '~';
+
+	if (right_val < left_val) {
+		pr_err("Numerical range for parameter \"%s\" contains"
+			" a right value which is less than the left.\n",
+				param->name);
+		return -1;
+	}
+
+	/*
+	 * For now,  enforce reasonable defaults for [I,O]FMarkInt.
+	 */
+	tilde_ptr = strchr(param->value, '~');
+	if (!tilde_ptr) {
+		pr_err("Unable to locate numerical range indicator"
+			" \"~\" for \"%s\".\n", param->name);
+		return -1;
+	}
+	*tilde_ptr = '\0';
+
+	left_val_ptr = param->value;
+	right_val_ptr = param->value + strlen(left_val_ptr) + 1;
+
+	local_left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+	local_right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+	*tilde_ptr = '~';
+
+	if (param->set_param) {
+		if ((left_val < local_left_val) ||
+		    (right_val < local_left_val)) {
+			pr_err("Passed value range \"%u~%u\" is below"
+				" minimum left value \"%u\" for key \"%s\","
+				" rejecting.\n", left_val, right_val,
+				local_left_val, param->name);
+			return -1;
+		}
+	} else {
+		if ((left_val < local_left_val) &&
+		    (right_val < local_left_val)) {
+			pr_err("Received value range \"%u~%u\" is"
+				" below minimum left value \"%u\" for key"
+				" \"%s\", rejecting.\n", left_val, right_val,
+				local_left_val, param->name);
+			SET_PSTATE_REJECT(param);
+			if (iscsi_update_param_value(param, REJECT) < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value)
+{
+	if (IS_PSTATE_PROPOSER(param))
+		return 0;
+
+	if (IS_TYPERANGE_AUTH_PARAM(param)) {
+		if (strcmp(value, KRB5) && strcmp(value, SPKM1) &&
+		    strcmp(value, SPKM2) && strcmp(value, SRP) &&
+		    strcmp(value, CHAP) && strcmp(value, NONE)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" \"%s\", \"%s\", \"%s\", \"%s\", \"%s\""
+				" or \"%s\".\n", param->name, KRB5,
+					SPKM1, SPKM2, SRP, CHAP, NONE);
+			return -1;
+		}
+	}
+	if (IS_TYPERANGE_DIGEST_PARAM(param)) {
+		if (strcmp(value, CRC32C) && strcmp(value, NONE)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" \"%s\" or \"%s\".\n", param->name,
+					CRC32C, NONE);
+			return -1;
+		}
+	}
+	if (IS_TYPERANGE_SESSIONTYPE(param)) {
+		if (strcmp(value, DISCOVERY) && strcmp(value, NORMAL)) {
+			pr_err("Illegal value for \"%s\", must be"
+				" \"%s\" or \"%s\".\n", param->name,
+					DISCOVERY, NORMAL);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	This function is used to pick a value range number,  currently just
+ *	returns the lesser of both right values.
+ */
+static char *iscsi_get_value_from_number_range(
+	struct iscsi_param *param,
+	char *value)
+{
+	char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL;
+	u32 acceptor_right_value, proposer_right_value;
+
+	tilde_ptr1 = strchr(value, '~');
+	if (!tilde_ptr1)
+		return NULL;
+	*tilde_ptr1++ = '\0';
+	proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0);
+
+	tilde_ptr2 = strchr(param->value, '~');
+	if (!tilde_ptr2)
+		return NULL;
+	*tilde_ptr2++ = '\0';
+	acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0);
+
+	return (acceptor_right_value >= proposer_right_value) ?
+		tilde_ptr1 : tilde_ptr2;
+}
+
+static char *iscsi_check_valuelist_for_support(
+	struct iscsi_param *param,
+	char *value)
+{
+	char *tmp1 = NULL, *tmp2 = NULL;
+	char *acceptor_values = NULL, *proposer_values = NULL;
+
+	acceptor_values = param->value;
+	proposer_values = value;
+
+	do {
+		if (!proposer_values)
+			return NULL;
+		tmp1 = strchr(proposer_values, ',');
+		if (tmp1)
+			*tmp1 = '\0';
+		acceptor_values = param->value;
+		do {
+			if (!acceptor_values) {
+				if (tmp1)
+					*tmp1 = ',';
+				return NULL;
+			}
+			tmp2 = strchr(acceptor_values, ',');
+			if (tmp2)
+				*tmp2 = '\0';
+			if (!acceptor_values || !proposer_values) {
+				if (tmp1)
+					*tmp1 = ',';
+				if (tmp2)
+					*tmp2 = ',';
+				return NULL;
+			}
+			if (!strcmp(acceptor_values, proposer_values)) {
+				if (tmp2)
+					*tmp2 = ',';
+				goto out;
+			}
+			if (tmp2)
+				*tmp2++ = ',';
+
+			acceptor_values = tmp2;
+			if (!acceptor_values)
+				break;
+		} while (acceptor_values);
+		if (tmp1)
+			*tmp1++ = ',';
+		proposer_values = tmp1;
+	} while (proposer_values);
+
+out:
+	return proposer_values;
+}
+
+static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value)
+{
+	u8 acceptor_boolean_value = 0, proposer_boolean_value = 0;
+	char *negoitated_value = NULL;
+
+	if (IS_PSTATE_ACCEPTOR(param)) {
+		pr_err("Received key \"%s\" twice, protocol error.\n",
+				param->name);
+		return -1;
+	}
+
+	if (IS_PSTATE_REJECT(param))
+		return 0;
+
+	if (IS_TYPE_BOOL_AND(param)) {
+		if (!strcmp(value, YES))
+			proposer_boolean_value = 1;
+		if (!strcmp(param->value, YES))
+			acceptor_boolean_value = 1;
+		if (acceptor_boolean_value && proposer_boolean_value)
+			do {} while (0);
+		else {
+			if (iscsi_update_param_value(param, NO) < 0)
+				return -1;
+			if (!proposer_boolean_value)
+				SET_PSTATE_REPLY_OPTIONAL(param);
+		}
+	} else if (IS_TYPE_BOOL_OR(param)) {
+		if (!strcmp(value, YES))
+			proposer_boolean_value = 1;
+		if (!strcmp(param->value, YES))
+			acceptor_boolean_value = 1;
+		if (acceptor_boolean_value || proposer_boolean_value) {
+			if (iscsi_update_param_value(param, YES) < 0)
+				return -1;
+			if (proposer_boolean_value)
+				SET_PSTATE_REPLY_OPTIONAL(param);
+		}
+	} else if (IS_TYPE_NUMBER(param)) {
+		char *tmpptr, buf[10];
+		u32 acceptor_value = simple_strtoul(param->value, &tmpptr, 0);
+		u32 proposer_value = simple_strtoul(value, &tmpptr, 0);
+
+		memset(buf, 0, 10);
+
+		if (!strcmp(param->name, MAXCONNECTIONS) ||
+		    !strcmp(param->name, MAXBURSTLENGTH) ||
+		    !strcmp(param->name, FIRSTBURSTLENGTH) ||
+		    !strcmp(param->name, MAXOUTSTANDINGR2T) ||
+		    !strcmp(param->name, DEFAULTTIME2RETAIN) ||
+		    !strcmp(param->name, ERRORRECOVERYLEVEL)) {
+			if (proposer_value > acceptor_value) {
+				sprintf(buf, "%u", acceptor_value);
+				if (iscsi_update_param_value(param,
+						&buf[0]) < 0)
+					return -1;
+			} else {
+				if (iscsi_update_param_value(param, value) < 0)
+					return -1;
+			}
+		} else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+			if (acceptor_value > proposer_value) {
+				sprintf(buf, "%u", acceptor_value);
+				if (iscsi_update_param_value(param,
+						&buf[0]) < 0)
+					return -1;
+			} else {
+				if (iscsi_update_param_value(param, value) < 0)
+					return -1;
+			}
+		} else {
+			if (iscsi_update_param_value(param, value) < 0)
+				return -1;
+		}
+
+		if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
+			SET_PSTATE_REPLY_OPTIONAL(param);
+	} else if (IS_TYPE_NUMBER_RANGE(param)) {
+		negoitated_value = iscsi_get_value_from_number_range(
+					param, value);
+		if (!negoitated_value)
+			return -1;
+		if (iscsi_update_param_value(param, negoitated_value) < 0)
+			return -1;
+	} else if (IS_TYPE_VALUE_LIST(param)) {
+		negoitated_value = iscsi_check_valuelist_for_support(
+					param, value);
+		if (!negoitated_value) {
+			pr_err("Proposer's value list \"%s\" contains"
+				" no valid values from Acceptor's value list"
+				" \"%s\".\n", value, param->value);
+			return -1;
+		}
+		if (iscsi_update_param_value(param, negoitated_value) < 0)
+			return -1;
+	} else if (IS_PHASE_DECLARATIVE(param)) {
+		if (iscsi_update_param_value(param, value) < 0)
+			return -1;
+		SET_PSTATE_REPLY_OPTIONAL(param);
+	}
+
+	return 0;
+}
+
+static int iscsi_check_proposer_state(struct iscsi_param *param, char *value)
+{
+	if (IS_PSTATE_RESPONSE_GOT(param)) {
+		pr_err("Received key \"%s\" twice, protocol error.\n",
+				param->name);
+		return -1;
+	}
+
+	if (IS_TYPE_NUMBER_RANGE(param)) {
+		u32 left_val = 0, right_val = 0, recieved_value = 0;
+		char *left_val_ptr = NULL, *right_val_ptr = NULL;
+		char *tilde_ptr = NULL, *tmp_ptr = NULL;
+
+		if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) {
+			if (iscsi_update_param_value(param, value) < 0)
+				return -1;
+			return 0;
+		}
+
+		tilde_ptr = strchr(value, '~');
+		if (tilde_ptr) {
+			pr_err("Illegal \"~\" in response for \"%s\".\n",
+					param->name);
+			return -1;
+		}
+		tilde_ptr = strchr(param->value, '~');
+		if (!tilde_ptr) {
+			pr_err("Unable to locate numerical range"
+				" indicator \"~\" for \"%s\".\n", param->name);
+			return -1;
+		}
+		*tilde_ptr = '\0';
+
+		left_val_ptr = param->value;
+		right_val_ptr = param->value + strlen(left_val_ptr) + 1;
+		left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+		right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+		recieved_value = simple_strtoul(value, &tmp_ptr, 0);
+
+		*tilde_ptr = '~';
+
+		if ((recieved_value < left_val) ||
+		    (recieved_value > right_val)) {
+			pr_err("Illegal response \"%s=%u\", value must"
+				" be between %u and %u.\n", param->name,
+				recieved_value, left_val, right_val);
+			return -1;
+		}
+	} else if (IS_TYPE_VALUE_LIST(param)) {
+		char *comma_ptr = NULL, *tmp_ptr = NULL;
+
+		comma_ptr = strchr(value, ',');
+		if (comma_ptr) {
+			pr_err("Illegal \",\" in response for \"%s\".\n",
+					param->name);
+			return -1;
+		}
+
+		tmp_ptr = iscsi_check_valuelist_for_support(param, value);
+		if (!tmp_ptr)
+			return -1;
+	}
+
+	if (iscsi_update_param_value(param, value) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int iscsi_check_value(struct iscsi_param *param, char *value)
+{
+	char *comma_ptr = NULL;
+
+	if (!strcmp(value, REJECT)) {
+		if (!strcmp(param->name, IFMARKINT) ||
+		    !strcmp(param->name, OFMARKINT)) {
+			/*
+			 * Reject is not fatal for [I,O]FMarkInt,  and causes
+			 * [I,O]FMarker to be reset to No. (See iSCSI v20 A.3.2)
+			 */
+			SET_PSTATE_REJECT(param);
+			return 0;
+		}
+		pr_err("Received %s=%s\n", param->name, value);
+		return -1;
+	}
+	if (!strcmp(value, IRRELEVANT)) {
+		pr_debug("Received %s=%s\n", param->name, value);
+		SET_PSTATE_IRRELEVANT(param);
+		return 0;
+	}
+	if (!strcmp(value, NOTUNDERSTOOD)) {
+		if (!IS_PSTATE_PROPOSER(param)) {
+			pr_err("Received illegal offer %s=%s\n",
+				param->name, value);
+			return -1;
+		}
+
+/* #warning FIXME: Add check for X-ExtensionKey here */
+		pr_err("Standard iSCSI key \"%s\" cannot be answered"
+			" with \"%s\", protocol error.\n", param->name, value);
+		return -1;
+	}
+
+	do {
+		comma_ptr = NULL;
+		comma_ptr = strchr(value, ',');
+
+		if (comma_ptr && !IS_TYPE_VALUE_LIST(param)) {
+			pr_err("Detected value seperator \",\", but"
+				" key \"%s\" does not allow a value list,"
+				" protocol error.\n", param->name);
+			return -1;
+		}
+		if (comma_ptr)
+			*comma_ptr = '\0';
+
+		if (strlen(value) > VALUE_MAXLEN) {
+			pr_err("Value for key \"%s\" exceeds %d,"
+				" protocol error.\n", param->name,
+				VALUE_MAXLEN);
+			return -1;
+		}
+
+		if (IS_TYPE_BOOL_AND(param) || IS_TYPE_BOOL_OR(param)) {
+			if (iscsi_check_boolean_value(param, value) < 0)
+				return -1;
+		} else if (IS_TYPE_NUMBER(param)) {
+			if (iscsi_check_numerical_value(param, value) < 0)
+				return -1;
+		} else if (IS_TYPE_NUMBER_RANGE(param)) {
+			if (iscsi_check_numerical_range_value(param, value) < 0)
+				return -1;
+		} else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) {
+			if (iscsi_check_string_or_list_value(param, value) < 0)
+				return -1;
+		} else {
+			pr_err("Huh? 0x%02x\n", param->type);
+			return -1;
+		}
+
+		if (comma_ptr)
+			*comma_ptr++ = ',';
+
+		value = comma_ptr;
+	} while (value);
+
+	return 0;
+}
+
+static struct iscsi_param *__iscsi_check_key(
+	char *key,
+	int sender,
+	struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param;
+
+	if (strlen(key) > KEY_MAXLEN) {
+		pr_err("Length of key name \"%s\" exceeds %d.\n",
+			key, KEY_MAXLEN);
+		return NULL;
+	}
+
+	param = iscsi_find_param_from_key(key, param_list);
+	if (!param)
+		return NULL;
+
+	if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) {
+		pr_err("Key \"%s\" may not be sent to %s,"
+			" protocol error.\n", param->name,
+			(sender & SENDER_RECEIVER) ? "target" : "initiator");
+		return NULL;
+	}
+
+	if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) {
+		pr_err("Key \"%s\" may not be sent to %s,"
+			" protocol error.\n", param->name,
+			(sender & SENDER_RECEIVER) ? "initiator" : "target");
+		return NULL;
+	}
+
+	return param;
+}
+
+static struct iscsi_param *iscsi_check_key(
+	char *key,
+	int phase,
+	int sender,
+	struct iscsi_param_list *param_list)
+{
+	struct iscsi_param *param;
+	/*
+	 * Key name length must not exceed 63 bytes. (See iSCSI v20 5.1)
+	 */
+	if (strlen(key) > KEY_MAXLEN) {
+		pr_err("Length of key name \"%s\" exceeds %d.\n",
+			key, KEY_MAXLEN);
+		return NULL;
+	}
+
+	param = iscsi_find_param_from_key(key, param_list);
+	if (!param)
+		return NULL;
+
+	if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) {
+		pr_err("Key \"%s\" may not be sent to %s,"
+			" protocol error.\n", param->name,
+			(sender & SENDER_RECEIVER) ? "target" : "initiator");
+		return NULL;
+	}
+	if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) {
+		pr_err("Key \"%s\" may not be sent to %s,"
+				" protocol error.\n", param->name,
+			(sender & SENDER_RECEIVER) ? "initiator" : "target");
+		return NULL;
+	}
+
+	if (IS_PSTATE_ACCEPTOR(param)) {
+		pr_err("Key \"%s\" received twice, protocol error.\n",
+				key);
+		return NULL;
+	}
+
+	if (!phase)
+		return param;
+
+	if (!(param->phase & phase)) {
+		pr_err("Key \"%s\" may not be negotiated during ",
+				param->name);
+		switch (phase) {
+		case PHASE_SECURITY:
+			pr_debug("Security phase.\n");
+			break;
+		case PHASE_OPERATIONAL:
+			pr_debug("Operational phase.\n");
+		default:
+			pr_debug("Unknown phase.\n");
+		}
+		return NULL;
+	}
+
+	return param;
+}
+
+static int iscsi_enforce_integrity_rules(
+	u8 phase,
+	struct iscsi_param_list *param_list)
+{
+	char *tmpptr;
+	u8 DataSequenceInOrder = 0;
+	u8 ErrorRecoveryLevel = 0, SessionType = 0;
+	u8 IFMarker = 0, OFMarker = 0;
+	u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0;
+	u32 FirstBurstLength = 0, MaxBurstLength = 0;
+	struct iscsi_param *param = NULL;
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!(param->phase & phase))
+			continue;
+		if (!strcmp(param->name, SESSIONTYPE))
+			if (!strcmp(param->value, NORMAL))
+				SessionType = 1;
+		if (!strcmp(param->name, ERRORRECOVERYLEVEL))
+			ErrorRecoveryLevel = simple_strtoul(param->value,
+					&tmpptr, 0);
+		if (!strcmp(param->name, DATASEQUENCEINORDER))
+			if (!strcmp(param->value, YES))
+				DataSequenceInOrder = 1;
+		if (!strcmp(param->name, MAXBURSTLENGTH))
+			MaxBurstLength = simple_strtoul(param->value,
+					&tmpptr, 0);
+		if (!strcmp(param->name, IFMARKER))
+			if (!strcmp(param->value, YES))
+				IFMarker = 1;
+		if (!strcmp(param->name, OFMARKER))
+			if (!strcmp(param->value, YES))
+				OFMarker = 1;
+		if (!strcmp(param->name, IFMARKINT))
+			if (!strcmp(param->value, REJECT))
+				IFMarkInt_Reject = 1;
+		if (!strcmp(param->name, OFMARKINT))
+			if (!strcmp(param->value, REJECT))
+				OFMarkInt_Reject = 1;
+	}
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!(param->phase & phase))
+			continue;
+		if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) &&
+		     (strcmp(param->name, IFMARKER) &&
+		      strcmp(param->name, OFMARKER) &&
+		      strcmp(param->name, IFMARKINT) &&
+		      strcmp(param->name, OFMARKINT))))
+			continue;
+		if (!strcmp(param->name, MAXOUTSTANDINGR2T) &&
+		    DataSequenceInOrder && (ErrorRecoveryLevel > 0)) {
+			if (strcmp(param->value, "1")) {
+				if (iscsi_update_param_value(param, "1") < 0)
+					return -1;
+				pr_debug("Reset \"%s\" to \"%s\".\n",
+					param->name, param->value);
+			}
+		}
+		if (!strcmp(param->name, MAXCONNECTIONS) && !SessionType) {
+			if (strcmp(param->value, "1")) {
+				if (iscsi_update_param_value(param, "1") < 0)
+					return -1;
+				pr_debug("Reset \"%s\" to \"%s\".\n",
+					param->name, param->value);
+			}
+		}
+		if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+			FirstBurstLength = simple_strtoul(param->value,
+					&tmpptr, 0);
+			if (FirstBurstLength > MaxBurstLength) {
+				char tmpbuf[10];
+				memset(tmpbuf, 0, 10);
+				sprintf(tmpbuf, "%u", MaxBurstLength);
+				if (iscsi_update_param_value(param, tmpbuf))
+					return -1;
+				pr_debug("Reset \"%s\" to \"%s\".\n",
+					param->name, param->value);
+			}
+		}
+		if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) {
+			if (iscsi_update_param_value(param, NO) < 0)
+				return -1;
+			IFMarker = 0;
+			pr_debug("Reset \"%s\" to \"%s\".\n",
+					param->name, param->value);
+		}
+		if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) {
+			if (iscsi_update_param_value(param, NO) < 0)
+				return -1;
+			OFMarker = 0;
+			pr_debug("Reset \"%s\" to \"%s\".\n",
+					 param->name, param->value);
+		}
+		if (!strcmp(param->name, IFMARKINT) && !IFMarker) {
+			if (!strcmp(param->value, REJECT))
+				continue;
+			param->state &= ~PSTATE_NEGOTIATE;
+			if (iscsi_update_param_value(param, IRRELEVANT) < 0)
+				return -1;
+			pr_debug("Reset \"%s\" to \"%s\".\n",
+					param->name, param->value);
+		}
+		if (!strcmp(param->name, OFMARKINT) && !OFMarker) {
+			if (!strcmp(param->value, REJECT))
+				continue;
+			param->state &= ~PSTATE_NEGOTIATE;
+			if (iscsi_update_param_value(param, IRRELEVANT) < 0)
+				return -1;
+			pr_debug("Reset \"%s\" to \"%s\".\n",
+					param->name, param->value);
+		}
+	}
+
+	return 0;
+}
+
+int iscsi_decode_text_input(
+	u8 phase,
+	u8 sender,
+	char *textbuf,
+	u32 length,
+	struct iscsi_param_list *param_list)
+{
+	char *tmpbuf, *start = NULL, *end = NULL;
+
+	tmpbuf = kzalloc(length + 1, GFP_KERNEL);
+	if (!tmpbuf) {
+		pr_err("Unable to allocate memory for tmpbuf.\n");
+		return -1;
+	}
+
+	memcpy(tmpbuf, textbuf, length);
+	tmpbuf[length] = '\0';
+	start = tmpbuf;
+	end = (start + length);
+
+	while (start < end) {
+		char *key, *value;
+		struct iscsi_param *param;
+
+		if (iscsi_extract_key_value(start, &key, &value) < 0) {
+			kfree(tmpbuf);
+			return -1;
+		}
+
+		pr_debug("Got key: %s=%s\n", key, value);
+
+		if (phase & PHASE_SECURITY) {
+			if (iscsi_check_for_auth_key(key) > 0) {
+				char *tmpptr = key + strlen(key);
+				*tmpptr = '=';
+				kfree(tmpbuf);
+				return 1;
+			}
+		}
+
+		param = iscsi_check_key(key, phase, sender, param_list);
+		if (!param) {
+			if (iscsi_add_notunderstood_response(key,
+					value, param_list) < 0) {
+				kfree(tmpbuf);
+				return -1;
+			}
+			start += strlen(key) + strlen(value) + 2;
+			continue;
+		}
+		if (iscsi_check_value(param, value) < 0) {
+			kfree(tmpbuf);
+			return -1;
+		}
+
+		start += strlen(key) + strlen(value) + 2;
+
+		if (IS_PSTATE_PROPOSER(param)) {
+			if (iscsi_check_proposer_state(param, value) < 0) {
+				kfree(tmpbuf);
+				return -1;
+			}
+			SET_PSTATE_RESPONSE_GOT(param);
+		} else {
+			if (iscsi_check_acceptor_state(param, value) < 0) {
+				kfree(tmpbuf);
+				return -1;
+			}
+			SET_PSTATE_ACCEPTOR(param);
+		}
+	}
+
+	kfree(tmpbuf);
+	return 0;
+}
+
+int iscsi_encode_text_output(
+	u8 phase,
+	u8 sender,
+	char *textbuf,
+	u32 *length,
+	struct iscsi_param_list *param_list)
+{
+	char *output_buf = NULL;
+	struct iscsi_extra_response *er;
+	struct iscsi_param *param;
+
+	output_buf = textbuf + *length;
+
+	if (iscsi_enforce_integrity_rules(phase, param_list) < 0)
+		return -1;
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!(param->sender & sender))
+			continue;
+		if (IS_PSTATE_ACCEPTOR(param) &&
+		    !IS_PSTATE_RESPONSE_SENT(param) &&
+		    !IS_PSTATE_REPLY_OPTIONAL(param) &&
+		    (param->phase & phase)) {
+			*length += sprintf(output_buf, "%s=%s",
+				param->name, param->value);
+			*length += 1;
+			output_buf = textbuf + *length;
+			SET_PSTATE_RESPONSE_SENT(param);
+			pr_debug("Sending key: %s=%s\n",
+				param->name, param->value);
+			continue;
+		}
+		if (IS_PSTATE_NEGOTIATE(param) &&
+		    !IS_PSTATE_ACCEPTOR(param) &&
+		    !IS_PSTATE_PROPOSER(param) &&
+		    (param->phase & phase)) {
+			*length += sprintf(output_buf, "%s=%s",
+				param->name, param->value);
+			*length += 1;
+			output_buf = textbuf + *length;
+			SET_PSTATE_PROPOSER(param);
+			iscsi_check_proposer_for_optional_reply(param);
+			pr_debug("Sending key: %s=%s\n",
+				param->name, param->value);
+		}
+	}
+
+	list_for_each_entry(er, &param_list->extra_response_list, er_list) {
+		*length += sprintf(output_buf, "%s=%s", er->key, er->value);
+		*length += 1;
+		output_buf = textbuf + *length;
+		pr_debug("Sending key: %s=%s\n", er->key, er->value);
+	}
+	iscsi_release_extra_responses(param_list);
+
+	return 0;
+}
+
+int iscsi_check_negotiated_keys(struct iscsi_param_list *param_list)
+{
+	int ret = 0;
+	struct iscsi_param *param;
+
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (IS_PSTATE_NEGOTIATE(param) &&
+		    IS_PSTATE_PROPOSER(param) &&
+		    !IS_PSTATE_RESPONSE_GOT(param) &&
+		    !IS_PSTATE_REPLY_OPTIONAL(param) &&
+		    !IS_PHASE_DECLARATIVE(param)) {
+			pr_err("No response for proposed key \"%s\".\n",
+					param->name);
+			ret = -1;
+		}
+	}
+
+	return ret;
+}
+
+int iscsi_change_param_value(
+	char *keyvalue,
+	struct iscsi_param_list *param_list,
+	int check_key)
+{
+	char *key = NULL, *value = NULL;
+	struct iscsi_param *param;
+	int sender = 0;
+
+	if (iscsi_extract_key_value(keyvalue, &key, &value) < 0)
+		return -1;
+
+	if (!check_key) {
+		param = __iscsi_check_key(keyvalue, sender, param_list);
+		if (!param)
+			return -1;
+	} else {
+		param = iscsi_check_key(keyvalue, 0, sender, param_list);
+		if (!param)
+			return -1;
+
+		param->set_param = 1;
+		if (iscsi_check_value(param, value) < 0) {
+			param->set_param = 0;
+			return -1;
+		}
+		param->set_param = 0;
+	}
+
+	if (iscsi_update_param_value(param, value) < 0)
+		return -1;
+
+	return 0;
+}
+
+void iscsi_set_connection_parameters(
+	struct iscsi_conn_ops *ops,
+	struct iscsi_param_list *param_list)
+{
+	char *tmpptr;
+	struct iscsi_param *param;
+
+	pr_debug("---------------------------------------------------"
+			"---------------\n");
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param))
+			continue;
+		if (!strcmp(param->name, AUTHMETHOD)) {
+			pr_debug("AuthMethod:                   %s\n",
+				param->value);
+		} else if (!strcmp(param->name, HEADERDIGEST)) {
+			ops->HeaderDigest = !strcmp(param->value, CRC32C);
+			pr_debug("HeaderDigest:                 %s\n",
+				param->value);
+		} else if (!strcmp(param->name, DATADIGEST)) {
+			ops->DataDigest = !strcmp(param->value, CRC32C);
+			pr_debug("DataDigest:                   %s\n",
+				param->value);
+		} else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
+			ops->MaxRecvDataSegmentLength =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("MaxRecvDataSegmentLength:     %s\n",
+				param->value);
+		} else if (!strcmp(param->name, OFMARKER)) {
+			ops->OFMarker = !strcmp(param->value, YES);
+			pr_debug("OFMarker:                     %s\n",
+				param->value);
+		} else if (!strcmp(param->name, IFMARKER)) {
+			ops->IFMarker = !strcmp(param->value, YES);
+			pr_debug("IFMarker:                     %s\n",
+				param->value);
+		} else if (!strcmp(param->name, OFMARKINT)) {
+			ops->OFMarkInt =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("OFMarkInt:                    %s\n",
+				param->value);
+		} else if (!strcmp(param->name, IFMARKINT)) {
+			ops->IFMarkInt =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("IFMarkInt:                    %s\n",
+				param->value);
+		}
+	}
+	pr_debug("----------------------------------------------------"
+			"--------------\n");
+}
+
+void iscsi_set_session_parameters(
+	struct iscsi_sess_ops *ops,
+	struct iscsi_param_list *param_list,
+	int leading)
+{
+	char *tmpptr;
+	struct iscsi_param *param;
+
+	pr_debug("----------------------------------------------------"
+			"--------------\n");
+	list_for_each_entry(param, &param_list->param_list, p_list) {
+		if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param))
+			continue;
+		if (!strcmp(param->name, INITIATORNAME)) {
+			if (!param->value)
+				continue;
+			if (leading)
+				snprintf(ops->InitiatorName,
+						sizeof(ops->InitiatorName),
+						"%s", param->value);
+			pr_debug("InitiatorName:                %s\n",
+				param->value);
+		} else if (!strcmp(param->name, INITIATORALIAS)) {
+			if (!param->value)
+				continue;
+			snprintf(ops->InitiatorAlias,
+						sizeof(ops->InitiatorAlias),
+						"%s", param->value);
+			pr_debug("InitiatorAlias:               %s\n",
+				param->value);
+		} else if (!strcmp(param->name, TARGETNAME)) {
+			if (!param->value)
+				continue;
+			if (leading)
+				snprintf(ops->TargetName,
+						sizeof(ops->TargetName),
+						"%s", param->value);
+			pr_debug("TargetName:                   %s\n",
+				param->value);
+		} else if (!strcmp(param->name, TARGETALIAS)) {
+			if (!param->value)
+				continue;
+			snprintf(ops->TargetAlias, sizeof(ops->TargetAlias),
+					"%s", param->value);
+			pr_debug("TargetAlias:                  %s\n",
+				param->value);
+		} else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) {
+			ops->TargetPortalGroupTag =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("TargetPortalGroupTag:         %s\n",
+				param->value);
+		} else if (!strcmp(param->name, MAXCONNECTIONS)) {
+			ops->MaxConnections =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("MaxConnections:               %s\n",
+				param->value);
+		} else if (!strcmp(param->name, INITIALR2T)) {
+			ops->InitialR2T = !strcmp(param->value, YES);
+			 pr_debug("InitialR2T:                   %s\n",
+				param->value);
+		} else if (!strcmp(param->name, IMMEDIATEDATA)) {
+			ops->ImmediateData = !strcmp(param->value, YES);
+			pr_debug("ImmediateData:                %s\n",
+				param->value);
+		} else if (!strcmp(param->name, MAXBURSTLENGTH)) {
+			ops->MaxBurstLength =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("MaxBurstLength:               %s\n",
+				param->value);
+		} else if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+			ops->FirstBurstLength =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("FirstBurstLength:             %s\n",
+				param->value);
+		} else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+			ops->DefaultTime2Wait =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("DefaultTime2Wait:             %s\n",
+				param->value);
+		} else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) {
+			ops->DefaultTime2Retain =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("DefaultTime2Retain:           %s\n",
+				param->value);
+		} else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) {
+			ops->MaxOutstandingR2T =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("MaxOutstandingR2T:            %s\n",
+				param->value);
+		} else if (!strcmp(param->name, DATAPDUINORDER)) {
+			ops->DataPDUInOrder = !strcmp(param->value, YES);
+			pr_debug("DataPDUInOrder:               %s\n",
+				param->value);
+		} else if (!strcmp(param->name, DATASEQUENCEINORDER)) {
+			ops->DataSequenceInOrder = !strcmp(param->value, YES);
+			pr_debug("DataSequenceInOrder:          %s\n",
+				param->value);
+		} else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) {
+			ops->ErrorRecoveryLevel =
+				simple_strtoul(param->value, &tmpptr, 0);
+			pr_debug("ErrorRecoveryLevel:           %s\n",
+				param->value);
+		} else if (!strcmp(param->name, SESSIONTYPE)) {
+			ops->SessionType = !strcmp(param->value, DISCOVERY);
+			pr_debug("SessionType:                  %s\n",
+				param->value);
+		}
+	}
+	pr_debug("----------------------------------------------------"
+			"--------------\n");
+
+}
diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h
new file mode 100644
index 0000000..6a37fd6
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_parameters.h
@@ -0,0 +1,269 @@
+#ifndef ISCSI_PARAMETERS_H
+#define ISCSI_PARAMETERS_H
+
+struct iscsi_extra_response {
+	char key[64];
+	char value[32];
+	struct list_head er_list;
+} ____cacheline_aligned;
+
+struct iscsi_param {
+	char *name;
+	char *value;
+	u8 set_param;
+	u8 phase;
+	u8 scope;
+	u8 sender;
+	u8 type;
+	u8 use;
+	u16 type_range;
+	u32 state;
+	struct list_head p_list;
+} ____cacheline_aligned;
+
+extern int iscsi_login_rx_data(struct iscsi_conn *, char *, int);
+extern int iscsi_login_tx_data(struct iscsi_conn *, char *, char *, int);
+extern void iscsi_dump_conn_ops(struct iscsi_conn_ops *);
+extern void iscsi_dump_sess_ops(struct iscsi_sess_ops *);
+extern void iscsi_print_params(struct iscsi_param_list *);
+extern int iscsi_create_default_params(struct iscsi_param_list **);
+extern int iscsi_set_keys_to_negotiate(int, struct iscsi_param_list *);
+extern int iscsi_set_keys_irrelevant_for_discovery(struct iscsi_param_list *);
+extern int iscsi_copy_param_list(struct iscsi_param_list **,
+			struct iscsi_param_list *, int);
+extern int iscsi_change_param_value(char *, struct iscsi_param_list *, int);
+extern void iscsi_release_param_list(struct iscsi_param_list *);
+extern struct iscsi_param *iscsi_find_param_from_key(char *, struct iscsi_param_list *);
+extern int iscsi_extract_key_value(char *, char **, char **);
+extern int iscsi_update_param_value(struct iscsi_param *, char *);
+extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_param_list *);
+extern int iscsi_encode_text_output(u8, u8, char *, u32 *,
+			struct iscsi_param_list *);
+extern int iscsi_check_negotiated_keys(struct iscsi_param_list *);
+extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *,
+			struct iscsi_param_list *);
+extern void iscsi_set_session_parameters(struct iscsi_sess_ops *,
+			struct iscsi_param_list *, int);
+
+#define YES				"Yes"
+#define NO				"No"
+#define ALL				"All"
+#define IRRELEVANT			"Irrelevant"
+#define NONE				"None"
+#define NOTUNDERSTOOD			"NotUnderstood"
+#define REJECT				"Reject"
+
+/*
+ * The Parameter Names.
+ */
+#define AUTHMETHOD			"AuthMethod"
+#define HEADERDIGEST			"HeaderDigest"
+#define DATADIGEST			"DataDigest"
+#define MAXCONNECTIONS			"MaxConnections"
+#define SENDTARGETS			"SendTargets"
+#define TARGETNAME			"TargetName"
+#define INITIATORNAME			"InitiatorName"
+#define TARGETALIAS			"TargetAlias"
+#define INITIATORALIAS			"InitiatorAlias"
+#define TARGETADDRESS			"TargetAddress"
+#define TARGETPORTALGROUPTAG		"TargetPortalGroupTag"
+#define INITIALR2T			"InitialR2T"
+#define IMMEDIATEDATA			"ImmediateData"
+#define MAXRECVDATASEGMENTLENGTH	"MaxRecvDataSegmentLength"
+#define MAXBURSTLENGTH			"MaxBurstLength"
+#define FIRSTBURSTLENGTH		"FirstBurstLength"
+#define DEFAULTTIME2WAIT		"DefaultTime2Wait"
+#define DEFAULTTIME2RETAIN		"DefaultTime2Retain"
+#define MAXOUTSTANDINGR2T		"MaxOutstandingR2T"
+#define DATAPDUINORDER			"DataPDUInOrder"
+#define DATASEQUENCEINORDER		"DataSequenceInOrder"
+#define ERRORRECOVERYLEVEL		"ErrorRecoveryLevel"
+#define SESSIONTYPE			"SessionType"
+#define IFMARKER			"IFMarker"
+#define OFMARKER			"OFMarker"
+#define IFMARKINT			"IFMarkInt"
+#define OFMARKINT			"OFMarkInt"
+#define X_EXTENSIONKEY			"X-com.sbei.version"
+#define X_EXTENSIONKEY_CISCO_NEW	"X-com.cisco.protocol"
+#define X_EXTENSIONKEY_CISCO_OLD	"X-com.cisco.iscsi.draft"
+
+/*
+ * For AuthMethod.
+ */
+#define KRB5				"KRB5"
+#define SPKM1				"SPKM1"
+#define SPKM2				"SPKM2"
+#define SRP				"SRP"
+#define CHAP				"CHAP"
+
+/*
+ * Initial values for Parameter Negotiation.
+ */
+#define INITIAL_AUTHMETHOD			CHAP
+#define INITIAL_HEADERDIGEST			"CRC32C,None"
+#define INITIAL_DATADIGEST			"CRC32C,None"
+#define INITIAL_MAXCONNECTIONS			"1"
+#define INITIAL_SENDTARGETS			ALL
+#define INITIAL_TARGETNAME			"LIO.Target"
+#define INITIAL_INITIATORNAME			"LIO.Initiator"
+#define INITIAL_TARGETALIAS			"LIO Target"
+#define INITIAL_INITIATORALIAS			"LIO Initiator"
+#define INITIAL_TARGETADDRESS			"0.0.0.0:0000,0"
+#define INITIAL_TARGETPORTALGROUPTAG		"1"
+#define INITIAL_INITIALR2T			YES
+#define INITIAL_IMMEDIATEDATA			YES
+#define INITIAL_MAXRECVDATASEGMENTLENGTH	"8192"
+#define INITIAL_MAXBURSTLENGTH			"262144"
+#define INITIAL_FIRSTBURSTLENGTH		"65536"
+#define INITIAL_DEFAULTTIME2WAIT		"2"
+#define INITIAL_DEFAULTTIME2RETAIN		"20"
+#define INITIAL_MAXOUTSTANDINGR2T		"1"
+#define INITIAL_DATAPDUINORDER			YES
+#define INITIAL_DATASEQUENCEINORDER		YES
+#define INITIAL_ERRORRECOVERYLEVEL		"0"
+#define INITIAL_SESSIONTYPE			NORMAL
+#define INITIAL_IFMARKER			NO
+#define INITIAL_OFMARKER			NO
+#define INITIAL_IFMARKINT			"2048~65535"
+#define INITIAL_OFMARKINT			"2048~65535"
+
+/*
+ * For [Header,Data]Digests.
+ */
+#define CRC32C				"CRC32C"
+
+/*
+ * For SessionType.
+ */
+#define DISCOVERY			"Discovery"
+#define NORMAL				"Normal"
+
+/*
+ * struct iscsi_param->use
+ */
+#define USE_LEADING_ONLY		0x01
+#define USE_INITIAL_ONLY		0x02
+#define USE_ALL				0x04
+
+#define IS_USE_LEADING_ONLY(p)		((p)->use & USE_LEADING_ONLY)
+#define IS_USE_INITIAL_ONLY(p)		((p)->use & USE_INITIAL_ONLY)
+#define IS_USE_ALL(p)			((p)->use & USE_ALL)
+
+#define SET_USE_INITIAL_ONLY(p)		((p)->use |= USE_INITIAL_ONLY)
+
+/*
+ * struct iscsi_param->sender
+ */
+#define	SENDER_INITIATOR		0x01
+#define SENDER_TARGET			0x02
+#define SENDER_BOTH			0x03
+/* Used in iscsi_check_key() */
+#define SENDER_RECEIVER			0x04
+
+#define IS_SENDER_INITIATOR(p)		((p)->sender & SENDER_INITIATOR)
+#define IS_SENDER_TARGET(p)		((p)->sender & SENDER_TARGET)
+#define IS_SENDER_BOTH(p)		((p)->sender & SENDER_BOTH)
+
+/*
+ * struct iscsi_param->scope
+ */
+#define SCOPE_CONNECTION_ONLY		0x01
+#define SCOPE_SESSION_WIDE		0x02
+
+#define IS_SCOPE_CONNECTION_ONLY(p)	((p)->scope & SCOPE_CONNECTION_ONLY)
+#define IS_SCOPE_SESSION_WIDE(p)	((p)->scope & SCOPE_SESSION_WIDE)
+
+/*
+ * struct iscsi_param->phase
+ */
+#define PHASE_SECURITY			0x01
+#define PHASE_OPERATIONAL		0x02
+#define PHASE_DECLARATIVE		0x04
+#define PHASE_FFP0			0x08
+
+#define IS_PHASE_SECURITY(p)		((p)->phase & PHASE_SECURITY)
+#define IS_PHASE_OPERATIONAL(p)		((p)->phase & PHASE_OPERATIONAL)
+#define IS_PHASE_DECLARATIVE(p)		((p)->phase & PHASE_DECLARATIVE)
+#define IS_PHASE_FFP0(p)		((p)->phase & PHASE_FFP0)
+
+/*
+ * struct iscsi_param->type
+ */
+#define TYPE_BOOL_AND			0x01
+#define TYPE_BOOL_OR			0x02
+#define TYPE_NUMBER			0x04
+#define TYPE_NUMBER_RANGE		0x08
+#define TYPE_STRING			0x10
+#define TYPE_VALUE_LIST			0x20
+
+#define IS_TYPE_BOOL_AND(p)		((p)->type & TYPE_BOOL_AND)
+#define IS_TYPE_BOOL_OR(p)		((p)->type & TYPE_BOOL_OR)
+#define IS_TYPE_NUMBER(p)		((p)->type & TYPE_NUMBER)
+#define IS_TYPE_NUMBER_RANGE(p)		((p)->type & TYPE_NUMBER_RANGE)
+#define IS_TYPE_STRING(p)		((p)->type & TYPE_STRING)
+#define IS_TYPE_VALUE_LIST(p)		((p)->type & TYPE_VALUE_LIST)
+
+/*
+ * struct iscsi_param->type_range
+ */
+#define TYPERANGE_BOOL_AND		0x0001
+#define TYPERANGE_BOOL_OR		0x0002
+#define TYPERANGE_0_TO_2		0x0004
+#define TYPERANGE_0_TO_3600		0x0008
+#define TYPERANGE_0_TO_32767		0x0010
+#define TYPERANGE_0_TO_65535		0x0020
+#define TYPERANGE_1_TO_65535		0x0040
+#define TYPERANGE_2_TO_3600		0x0080
+#define TYPERANGE_512_TO_16777215	0x0100
+#define TYPERANGE_AUTH			0x0200
+#define TYPERANGE_DIGEST		0x0400
+#define TYPERANGE_ISCSINAME		0x0800
+#define TYPERANGE_MARKINT		0x1000
+#define TYPERANGE_SESSIONTYPE		0x2000
+#define TYPERANGE_TARGETADDRESS		0x4000
+#define TYPERANGE_UTF8			0x8000
+
+#define IS_TYPERANGE_0_TO_2(p)		((p)->type_range & TYPERANGE_0_TO_2)
+#define IS_TYPERANGE_0_TO_3600(p)	((p)->type_range & TYPERANGE_0_TO_3600)
+#define IS_TYPERANGE_0_TO_32767(p)	((p)->type_range & TYPERANGE_0_TO_32767)
+#define IS_TYPERANGE_0_TO_65535(p)	((p)->type_range & TYPERANGE_0_TO_65535)
+#define IS_TYPERANGE_1_TO_65535(p)	((p)->type_range & TYPERANGE_1_TO_65535)
+#define IS_TYPERANGE_2_TO_3600(p)	((p)->type_range & TYPERANGE_2_TO_3600)
+#define IS_TYPERANGE_512_TO_16777215(p)	((p)->type_range & \
+						TYPERANGE_512_TO_16777215)
+#define IS_TYPERANGE_AUTH_PARAM(p)	((p)->type_range & TYPERANGE_AUTH)
+#define IS_TYPERANGE_DIGEST_PARAM(p)	((p)->type_range & TYPERANGE_DIGEST)
+#define IS_TYPERANGE_SESSIONTYPE(p)	((p)->type_range & \
+						TYPERANGE_SESSIONTYPE)
+
+/*
+ * struct iscsi_param->state
+ */
+#define PSTATE_ACCEPTOR			0x01
+#define PSTATE_NEGOTIATE		0x02
+#define PSTATE_PROPOSER			0x04
+#define PSTATE_IRRELEVANT		0x08
+#define PSTATE_REJECT			0x10
+#define PSTATE_REPLY_OPTIONAL		0x20
+#define PSTATE_RESPONSE_GOT		0x40
+#define PSTATE_RESPONSE_SENT		0x80
+
+#define IS_PSTATE_ACCEPTOR(p)		((p)->state & PSTATE_ACCEPTOR)
+#define IS_PSTATE_NEGOTIATE(p)		((p)->state & PSTATE_NEGOTIATE)
+#define IS_PSTATE_PROPOSER(p)		((p)->state & PSTATE_PROPOSER)
+#define IS_PSTATE_IRRELEVANT(p)		((p)->state & PSTATE_IRRELEVANT)
+#define IS_PSTATE_REJECT(p)		((p)->state & PSTATE_REJECT)
+#define IS_PSTATE_REPLY_OPTIONAL(p)	((p)->state & PSTATE_REPLY_OPTIONAL)
+#define IS_PSTATE_RESPONSE_GOT(p)	((p)->state & PSTATE_RESPONSE_GOT)
+#define IS_PSTATE_RESPONSE_SENT(p)	((p)->state & PSTATE_RESPONSE_SENT)
+
+#define SET_PSTATE_ACCEPTOR(p)		((p)->state |= PSTATE_ACCEPTOR)
+#define SET_PSTATE_NEGOTIATE(p)		((p)->state |= PSTATE_NEGOTIATE)
+#define SET_PSTATE_PROPOSER(p)		((p)->state |= PSTATE_PROPOSER)
+#define SET_PSTATE_IRRELEVANT(p)	((p)->state |= PSTATE_IRRELEVANT)
+#define SET_PSTATE_REJECT(p)		((p)->state |= PSTATE_REJECT)
+#define SET_PSTATE_REPLY_OPTIONAL(p)	((p)->state |= PSTATE_REPLY_OPTIONAL)
+#define SET_PSTATE_RESPONSE_GOT(p)	((p)->state |= PSTATE_RESPONSE_GOT)
+#define SET_PSTATE_RESPONSE_SENT(p)	((p)->state |= PSTATE_RESPONSE_SENT)
+
+#endif /* ISCSI_PARAMETERS_H */
diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c
new file mode 100644
index 0000000..fc69408
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c
@@ -0,0 +1,664 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI DataSequenceInOrder=No
+ * and DataPDUInOrder=No.
+ *
+ \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/slab.h>
+#include <linux/random.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_seq_pdu_list.h"
+
+#define OFFLOAD_BUF_SIZE	32768
+
+void iscsit_dump_seq_list(struct iscsi_cmd *cmd)
+{
+	int i;
+	struct iscsi_seq *seq;
+
+	pr_debug("Dumping Sequence List for ITT: 0x%08x:\n",
+			cmd->init_task_tag);
+
+	for (i = 0; i < cmd->seq_count; i++) {
+		seq = &cmd->seq_list[i];
+		pr_debug("i: %d, pdu_start: %d, pdu_count: %d,"
+			" offset: %d, xfer_len: %d, seq_send_order: %d,"
+			" seq_no: %d\n", i, seq->pdu_start, seq->pdu_count,
+			seq->offset, seq->xfer_len, seq->seq_send_order,
+			seq->seq_no);
+	}
+}
+
+void iscsit_dump_pdu_list(struct iscsi_cmd *cmd)
+{
+	int i;
+	struct iscsi_pdu *pdu;
+
+	pr_debug("Dumping PDU List for ITT: 0x%08x:\n",
+			cmd->init_task_tag);
+
+	for (i = 0; i < cmd->pdu_count; i++) {
+		pdu = &cmd->pdu_list[i];
+		pr_debug("i: %d, offset: %d, length: %d,"
+			" pdu_send_order: %d, seq_no: %d\n", i, pdu->offset,
+			pdu->length, pdu->pdu_send_order, pdu->seq_no);
+	}
+}
+
+static void iscsit_ordered_seq_lists(
+	struct iscsi_cmd *cmd,
+	u8 type)
+{
+	u32 i, seq_count = 0;
+
+	for (i = 0; i < cmd->seq_count; i++) {
+		if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+			continue;
+		cmd->seq_list[i].seq_send_order = seq_count++;
+	}
+}
+
+static void iscsit_ordered_pdu_lists(
+	struct iscsi_cmd *cmd,
+	u8 type)
+{
+	u32 i, pdu_send_order = 0, seq_no = 0;
+
+	for (i = 0; i < cmd->pdu_count; i++) {
+redo:
+		if (cmd->pdu_list[i].seq_no == seq_no) {
+			cmd->pdu_list[i].pdu_send_order = pdu_send_order++;
+			continue;
+		}
+		seq_no++;
+		pdu_send_order = 0;
+		goto redo;
+	}
+}
+
+/*
+ *	Generate count random values into array.
+ *	Use 0x80000000 to mark generates valued in array[].
+ */
+static void iscsit_create_random_array(u32 *array, u32 count)
+{
+	int i, j, k;
+
+	if (count == 1) {
+		array[0] = 0;
+		return;
+	}
+
+	for (i = 0; i < count; i++) {
+redo:
+		get_random_bytes(&j, sizeof(u32));
+		j = (1 + (int) (9999 + 1) - j) % count;
+		for (k = 0; k < i + 1; k++) {
+			j |= 0x80000000;
+			if ((array[k] & 0x80000000) && (array[k] == j))
+				goto redo;
+		}
+		array[i] = j;
+	}
+
+	for (i = 0; i < count; i++)
+		array[i] &= ~0x80000000;
+}
+
+static int iscsit_randomize_pdu_lists(
+	struct iscsi_cmd *cmd,
+	u8 type)
+{
+	int i = 0;
+	u32 *array, pdu_count, seq_count = 0, seq_no = 0, seq_offset = 0;
+
+	for (pdu_count = 0; pdu_count < cmd->pdu_count; pdu_count++) {
+redo:
+		if (cmd->pdu_list[pdu_count].seq_no == seq_no) {
+			seq_count++;
+			continue;
+		}
+		array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+		if (!array) {
+			pr_err("Unable to allocate memory"
+				" for random array.\n");
+			return -1;
+		}
+		iscsit_create_random_array(array, seq_count);
+
+		for (i = 0; i < seq_count; i++)
+			cmd->pdu_list[seq_offset+i].pdu_send_order = array[i];
+
+		kfree(array);
+
+		seq_offset += seq_count;
+		seq_count = 0;
+		seq_no++;
+		goto redo;
+	}
+
+	if (seq_count) {
+		array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+		if (!array) {
+			pr_err("Unable to allocate memory for"
+				" random array.\n");
+			return -1;
+		}
+		iscsit_create_random_array(array, seq_count);
+
+		for (i = 0; i < seq_count; i++)
+			cmd->pdu_list[seq_offset+i].pdu_send_order = array[i];
+
+		kfree(array);
+	}
+
+	return 0;
+}
+
+static int iscsit_randomize_seq_lists(
+	struct iscsi_cmd *cmd,
+	u8 type)
+{
+	int i, j = 0;
+	u32 *array, seq_count = cmd->seq_count;
+
+	if ((type == PDULIST_IMMEDIATE) || (type == PDULIST_UNSOLICITED))
+		seq_count--;
+	else if (type == PDULIST_IMMEDIATE_AND_UNSOLICITED)
+		seq_count -= 2;
+
+	if (!seq_count)
+		return 0;
+
+	array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+	if (!array) {
+		pr_err("Unable to allocate memory for random array.\n");
+		return -1;
+	}
+	iscsit_create_random_array(array, seq_count);
+
+	for (i = 0; i < cmd->seq_count; i++) {
+		if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+			continue;
+		cmd->seq_list[i].seq_send_order = array[j++];
+	}
+
+	kfree(array);
+	return 0;
+}
+
+static void iscsit_determine_counts_for_list(
+	struct iscsi_cmd *cmd,
+	struct iscsi_build_list *bl,
+	u32 *seq_count,
+	u32 *pdu_count)
+{
+	int check_immediate = 0;
+	u32 burstlength = 0, offset = 0;
+	u32 unsolicited_data_length = 0;
+	struct iscsi_conn *conn = cmd->conn;
+
+	if ((bl->type == PDULIST_IMMEDIATE) ||
+	    (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+		check_immediate = 1;
+
+	if ((bl->type == PDULIST_UNSOLICITED) ||
+	    (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+		unsolicited_data_length = (cmd->data_length >
+			conn->sess->sess_ops->FirstBurstLength) ?
+			conn->sess->sess_ops->FirstBurstLength : cmd->data_length;
+
+	while (offset < cmd->data_length) {
+		*pdu_count += 1;
+
+		if (check_immediate) {
+			check_immediate = 0;
+			offset += bl->immediate_data_length;
+			*seq_count += 1;
+			if (unsolicited_data_length)
+				unsolicited_data_length -=
+					bl->immediate_data_length;
+			continue;
+		}
+		if (unsolicited_data_length > 0) {
+			if ((offset + conn->conn_ops->MaxRecvDataSegmentLength)
+					>= cmd->data_length) {
+				unsolicited_data_length -=
+					(cmd->data_length - offset);
+				offset += (cmd->data_length - offset);
+				continue;
+			}
+			if ((offset + conn->conn_ops->MaxRecvDataSegmentLength)
+					>= conn->sess->sess_ops->FirstBurstLength) {
+				unsolicited_data_length -=
+					(conn->sess->sess_ops->FirstBurstLength -
+					offset);
+				offset += (conn->sess->sess_ops->FirstBurstLength -
+					offset);
+				burstlength = 0;
+				*seq_count += 1;
+				continue;
+			}
+
+			offset += conn->conn_ops->MaxRecvDataSegmentLength;
+			unsolicited_data_length -=
+				conn->conn_ops->MaxRecvDataSegmentLength;
+			continue;
+		}
+		if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+		     cmd->data_length) {
+			offset += (cmd->data_length - offset);
+			continue;
+		}
+		if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >=
+		     conn->sess->sess_ops->MaxBurstLength) {
+			offset += (conn->sess->sess_ops->MaxBurstLength -
+					burstlength);
+			burstlength = 0;
+			*seq_count += 1;
+			continue;
+		}
+
+		burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+		offset += conn->conn_ops->MaxRecvDataSegmentLength;
+	}
+}
+
+
+/*
+ *	Builds PDU and/or Sequence list,  called while DataSequenceInOrder=No
+ *	and DataPDUInOrder=No.
+ */
+static int iscsit_build_pdu_and_seq_list(
+	struct iscsi_cmd *cmd,
+	struct iscsi_build_list *bl)
+{
+	int check_immediate = 0, datapduinorder, datasequenceinorder;
+	u32 burstlength = 0, offset = 0, i = 0;
+	u32 pdu_count = 0, seq_no = 0, unsolicited_data_length = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_pdu *pdu = cmd->pdu_list;
+	struct iscsi_seq *seq = cmd->seq_list;
+
+	datapduinorder = conn->sess->sess_ops->DataPDUInOrder;
+	datasequenceinorder = conn->sess->sess_ops->DataSequenceInOrder;
+
+	if ((bl->type == PDULIST_IMMEDIATE) ||
+	    (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+		check_immediate = 1;
+
+	if ((bl->type == PDULIST_UNSOLICITED) ||
+	    (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+		unsolicited_data_length = (cmd->data_length >
+			conn->sess->sess_ops->FirstBurstLength) ?
+			conn->sess->sess_ops->FirstBurstLength : cmd->data_length;
+
+	while (offset < cmd->data_length) {
+		pdu_count++;
+		if (!datapduinorder) {
+			pdu[i].offset = offset;
+			pdu[i].seq_no = seq_no;
+		}
+		if (!datasequenceinorder && (pdu_count == 1)) {
+			seq[seq_no].pdu_start = i;
+			seq[seq_no].seq_no = seq_no;
+			seq[seq_no].offset = offset;
+			seq[seq_no].orig_offset = offset;
+		}
+
+		if (check_immediate) {
+			check_immediate = 0;
+			if (!datapduinorder) {
+				pdu[i].type = PDUTYPE_IMMEDIATE;
+				pdu[i++].length = bl->immediate_data_length;
+			}
+			if (!datasequenceinorder) {
+				seq[seq_no].type = SEQTYPE_IMMEDIATE;
+				seq[seq_no].pdu_count = 1;
+				seq[seq_no].xfer_len =
+					bl->immediate_data_length;
+			}
+			offset += bl->immediate_data_length;
+			pdu_count = 0;
+			seq_no++;
+			if (unsolicited_data_length)
+				unsolicited_data_length -=
+					bl->immediate_data_length;
+			continue;
+		}
+		if (unsolicited_data_length > 0) {
+			if ((offset +
+			     conn->conn_ops->MaxRecvDataSegmentLength) >=
+			     cmd->data_length) {
+				if (!datapduinorder) {
+					pdu[i].type = PDUTYPE_UNSOLICITED;
+					pdu[i].length =
+						(cmd->data_length - offset);
+				}
+				if (!datasequenceinorder) {
+					seq[seq_no].type = SEQTYPE_UNSOLICITED;
+					seq[seq_no].pdu_count = pdu_count;
+					seq[seq_no].xfer_len = (burstlength +
+						(cmd->data_length - offset));
+				}
+				unsolicited_data_length -=
+						(cmd->data_length - offset);
+				offset += (cmd->data_length - offset);
+				continue;
+			}
+			if ((offset +
+			     conn->conn_ops->MaxRecvDataSegmentLength) >=
+					conn->sess->sess_ops->FirstBurstLength) {
+				if (!datapduinorder) {
+					pdu[i].type = PDUTYPE_UNSOLICITED;
+					pdu[i++].length =
+					   (conn->sess->sess_ops->FirstBurstLength -
+						offset);
+				}
+				if (!datasequenceinorder) {
+					seq[seq_no].type = SEQTYPE_UNSOLICITED;
+					seq[seq_no].pdu_count = pdu_count;
+					seq[seq_no].xfer_len = (burstlength +
+					   (conn->sess->sess_ops->FirstBurstLength -
+						offset));
+				}
+				unsolicited_data_length -=
+					(conn->sess->sess_ops->FirstBurstLength -
+						offset);
+				offset += (conn->sess->sess_ops->FirstBurstLength -
+						offset);
+				burstlength = 0;
+				pdu_count = 0;
+				seq_no++;
+				continue;
+			}
+
+			if (!datapduinorder) {
+				pdu[i].type = PDUTYPE_UNSOLICITED;
+				pdu[i++].length =
+				     conn->conn_ops->MaxRecvDataSegmentLength;
+			}
+			burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+			offset += conn->conn_ops->MaxRecvDataSegmentLength;
+			unsolicited_data_length -=
+				conn->conn_ops->MaxRecvDataSegmentLength;
+			continue;
+		}
+		if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+		     cmd->data_length) {
+			if (!datapduinorder) {
+				pdu[i].type = PDUTYPE_NORMAL;
+				pdu[i].length = (cmd->data_length - offset);
+			}
+			if (!datasequenceinorder) {
+				seq[seq_no].type = SEQTYPE_NORMAL;
+				seq[seq_no].pdu_count = pdu_count;
+				seq[seq_no].xfer_len = (burstlength +
+					(cmd->data_length - offset));
+			}
+			offset += (cmd->data_length - offset);
+			continue;
+		}
+		if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >=
+		     conn->sess->sess_ops->MaxBurstLength) {
+			if (!datapduinorder) {
+				pdu[i].type = PDUTYPE_NORMAL;
+				pdu[i++].length =
+					(conn->sess->sess_ops->MaxBurstLength -
+						burstlength);
+			}
+			if (!datasequenceinorder) {
+				seq[seq_no].type = SEQTYPE_NORMAL;
+				seq[seq_no].pdu_count = pdu_count;
+				seq[seq_no].xfer_len = (burstlength +
+					(conn->sess->sess_ops->MaxBurstLength -
+					burstlength));
+			}
+			offset += (conn->sess->sess_ops->MaxBurstLength -
+					burstlength);
+			burstlength = 0;
+			pdu_count = 0;
+			seq_no++;
+			continue;
+		}
+
+		if (!datapduinorder) {
+			pdu[i].type = PDUTYPE_NORMAL;
+			pdu[i++].length =
+				conn->conn_ops->MaxRecvDataSegmentLength;
+		}
+		burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+		offset += conn->conn_ops->MaxRecvDataSegmentLength;
+	}
+
+	if (!datasequenceinorder) {
+		if (bl->data_direction & ISCSI_PDU_WRITE) {
+			if (bl->randomize & RANDOM_R2T_OFFSETS) {
+				if (iscsit_randomize_seq_lists(cmd, bl->type)
+						< 0)
+					return -1;
+			} else
+				iscsit_ordered_seq_lists(cmd, bl->type);
+		} else if (bl->data_direction & ISCSI_PDU_READ) {
+			if (bl->randomize & RANDOM_DATAIN_SEQ_OFFSETS) {
+				if (iscsit_randomize_seq_lists(cmd, bl->type)
+						< 0)
+					return -1;
+			} else
+				iscsit_ordered_seq_lists(cmd, bl->type);
+		}
+#if 0
+		iscsit_dump_seq_list(cmd);
+#endif
+	}
+	if (!datapduinorder) {
+		if (bl->data_direction & ISCSI_PDU_WRITE) {
+			if (bl->randomize & RANDOM_DATAOUT_PDU_OFFSETS) {
+				if (iscsit_randomize_pdu_lists(cmd, bl->type)
+						< 0)
+					return -1;
+			} else
+				iscsit_ordered_pdu_lists(cmd, bl->type);
+		} else if (bl->data_direction & ISCSI_PDU_READ) {
+			if (bl->randomize & RANDOM_DATAIN_PDU_OFFSETS) {
+				if (iscsit_randomize_pdu_lists(cmd, bl->type)
+						< 0)
+					return -1;
+			} else
+				iscsit_ordered_pdu_lists(cmd, bl->type);
+		}
+#if 0
+		iscsit_dump_pdu_list(cmd);
+#endif
+	}
+
+	return 0;
+}
+
+/*
+ *	Only called while DataSequenceInOrder=No or DataPDUInOrder=No.
+ */
+int iscsit_do_build_list(
+	struct iscsi_cmd *cmd,
+	struct iscsi_build_list *bl)
+{
+	u32 pdu_count = 0, seq_count = 1;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_pdu *pdu = NULL;
+	struct iscsi_seq *seq = NULL;
+
+	iscsit_determine_counts_for_list(cmd, bl, &seq_count, &pdu_count);
+
+	if (!conn->sess->sess_ops->DataSequenceInOrder) {
+		seq = kzalloc(seq_count * sizeof(struct iscsi_seq), GFP_ATOMIC);
+		if (!seq) {
+			pr_err("Unable to allocate struct iscsi_seq list\n");
+			return -1;
+		}
+		cmd->seq_list = seq;
+		cmd->seq_count = seq_count;
+	}
+
+	if (!conn->sess->sess_ops->DataPDUInOrder) {
+		pdu = kzalloc(pdu_count * sizeof(struct iscsi_pdu), GFP_ATOMIC);
+		if (!pdu) {
+			pr_err("Unable to allocate struct iscsi_pdu list.\n");
+			kfree(seq);
+			return -1;
+		}
+		cmd->pdu_list = pdu;
+		cmd->pdu_count = pdu_count;
+	}
+
+	return iscsit_build_pdu_and_seq_list(cmd, bl);
+}
+
+struct iscsi_pdu *iscsit_get_pdu_holder(
+	struct iscsi_cmd *cmd,
+	u32 offset,
+	u32 length)
+{
+	u32 i;
+	struct iscsi_pdu *pdu = NULL;
+
+	if (!cmd->pdu_list) {
+		pr_err("struct iscsi_cmd->pdu_list is NULL!\n");
+		return NULL;
+	}
+
+	pdu = &cmd->pdu_list[0];
+
+	for (i = 0; i < cmd->pdu_count; i++)
+		if ((pdu[i].offset == offset) && (pdu[i].length == length))
+			return &pdu[i];
+
+	pr_err("Unable to locate PDU holder for ITT: 0x%08x, Offset:"
+		" %u, Length: %u\n", cmd->init_task_tag, offset, length);
+	return NULL;
+}
+
+struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(
+	struct iscsi_cmd *cmd,
+	struct iscsi_seq *seq)
+{
+	u32 i;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_pdu *pdu = NULL;
+
+	if (!cmd->pdu_list) {
+		pr_err("struct iscsi_cmd->pdu_list is NULL!\n");
+		return NULL;
+	}
+
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+redo:
+		pdu = &cmd->pdu_list[cmd->pdu_start];
+
+		for (i = 0; pdu[i].seq_no != cmd->seq_no; i++) {
+#if 0
+			pr_debug("pdu[i].seq_no: %d, pdu[i].pdu"
+				"_send_order: %d, pdu[i].offset: %d,"
+				" pdu[i].length: %d\n", pdu[i].seq_no,
+				pdu[i].pdu_send_order, pdu[i].offset,
+				pdu[i].length);
+#endif
+			if (pdu[i].pdu_send_order == cmd->pdu_send_order) {
+				cmd->pdu_send_order++;
+				return &pdu[i];
+			}
+		}
+
+		cmd->pdu_start += cmd->pdu_send_order;
+		cmd->pdu_send_order = 0;
+		cmd->seq_no++;
+
+		if (cmd->pdu_start < cmd->pdu_count)
+			goto redo;
+
+		pr_err("Command ITT: 0x%08x unable to locate"
+			" struct iscsi_pdu for cmd->pdu_send_order: %u.\n",
+			cmd->init_task_tag, cmd->pdu_send_order);
+		return NULL;
+	} else {
+		if (!seq) {
+			pr_err("struct iscsi_seq is NULL!\n");
+			return NULL;
+		}
+#if 0
+		pr_debug("seq->pdu_start: %d, seq->pdu_count: %d,"
+			" seq->seq_no: %d\n", seq->pdu_start, seq->pdu_count,
+			seq->seq_no);
+#endif
+		pdu = &cmd->pdu_list[seq->pdu_start];
+
+		if (seq->pdu_send_order == seq->pdu_count) {
+			pr_err("Command ITT: 0x%08x seq->pdu_send"
+				"_order: %u equals seq->pdu_count: %u\n",
+				cmd->init_task_tag, seq->pdu_send_order,
+				seq->pdu_count);
+			return NULL;
+		}
+
+		for (i = 0; i < seq->pdu_count; i++) {
+			if (pdu[i].pdu_send_order == seq->pdu_send_order) {
+				seq->pdu_send_order++;
+				return &pdu[i];
+			}
+		}
+
+		pr_err("Command ITT: 0x%08x unable to locate iscsi"
+			"_pdu_t for seq->pdu_send_order: %u.\n",
+			cmd->init_task_tag, seq->pdu_send_order);
+		return NULL;
+	}
+
+	return NULL;
+}
+
+struct iscsi_seq *iscsit_get_seq_holder(
+	struct iscsi_cmd *cmd,
+	u32 offset,
+	u32 length)
+{
+	u32 i;
+
+	if (!cmd->seq_list) {
+		pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+		return NULL;
+	}
+
+	for (i = 0; i < cmd->seq_count; i++) {
+#if 0
+		pr_debug("seq_list[i].orig_offset: %d, seq_list[i]."
+			"xfer_len: %d, seq_list[i].seq_no %u\n",
+			cmd->seq_list[i].orig_offset, cmd->seq_list[i].xfer_len,
+			cmd->seq_list[i].seq_no);
+#endif
+		if ((cmd->seq_list[i].orig_offset +
+				cmd->seq_list[i].xfer_len) >=
+				(offset + length))
+			return &cmd->seq_list[i];
+	}
+
+	pr_err("Unable to locate Sequence holder for ITT: 0x%08x,"
+		" Offset: %u, Length: %u\n", cmd->init_task_tag, offset,
+		length);
+	return NULL;
+}
diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h
new file mode 100644
index 0000000..0d52a10
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h
@@ -0,0 +1,86 @@
+#ifndef ISCSI_SEQ_AND_PDU_LIST_H
+#define ISCSI_SEQ_AND_PDU_LIST_H
+
+/* struct iscsi_pdu->status */
+#define DATAOUT_PDU_SENT			1
+
+/* struct iscsi_seq->type */
+#define SEQTYPE_IMMEDIATE			1
+#define SEQTYPE_UNSOLICITED			2
+#define SEQTYPE_NORMAL				3
+
+/* struct iscsi_seq->status */
+#define DATAOUT_SEQUENCE_GOT_R2T		1
+#define DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY 2
+#define DATAOUT_SEQUENCE_COMPLETE		3
+
+/* iscsi_determine_counts_for_list() type */
+#define PDULIST_NORMAL				1
+#define PDULIST_IMMEDIATE			2
+#define PDULIST_UNSOLICITED			3
+#define PDULIST_IMMEDIATE_AND_UNSOLICITED	4
+
+/* struct iscsi_pdu->type */
+#define PDUTYPE_IMMEDIATE			1
+#define PDUTYPE_UNSOLICITED			2
+#define PDUTYPE_NORMAL				3
+
+/* struct iscsi_pdu->status */
+#define ISCSI_PDU_NOT_RECEIVED			0
+#define ISCSI_PDU_RECEIVED_OK			1
+#define ISCSI_PDU_CRC_FAILED			2
+#define ISCSI_PDU_TIMED_OUT			3
+
+/* struct iscsi_build_list->randomize */
+#define RANDOM_DATAIN_PDU_OFFSETS		0x01
+#define RANDOM_DATAIN_SEQ_OFFSETS		0x02
+#define RANDOM_DATAOUT_PDU_OFFSETS		0x04
+#define RANDOM_R2T_OFFSETS			0x08
+
+/* struct iscsi_build_list->data_direction */
+#define ISCSI_PDU_READ				0x01
+#define ISCSI_PDU_WRITE				0x02
+
+struct iscsi_build_list {
+	int		data_direction;
+	int		randomize;
+	int		type;
+	int		immediate_data_length;
+};
+
+struct iscsi_pdu {
+	int		status;
+	int		type;
+	u8		flags;
+	u32		data_sn;
+	u32		length;
+	u32		offset;
+	u32		pdu_send_order;
+	u32		seq_no;
+} ____cacheline_aligned;
+
+struct iscsi_seq {
+	int		sent;
+	int		status;
+	int		type;
+	u32		data_sn;
+	u32		first_datasn;
+	u32		last_datasn;
+	u32		next_burst_len;
+	u32		pdu_start;
+	u32		pdu_count;
+	u32		offset;
+	u32		orig_offset;
+	u32		pdu_send_order;
+	u32		r2t_sn;
+	u32		seq_send_order;
+	u32		seq_no;
+	u32		xfer_len;
+} ____cacheline_aligned;
+
+extern int iscsit_do_build_list(struct iscsi_cmd *, struct iscsi_build_list *);
+extern struct iscsi_pdu *iscsit_get_pdu_holder(struct iscsi_cmd *, u32, u32);
+extern struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(struct iscsi_cmd *, struct iscsi_seq *);
+extern struct iscsi_seq *iscsit_get_seq_holder(struct iscsi_cmd *, u32, u32);
+
+#endif /* ISCSI_SEQ_AND_PDU_LIST_H */
diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c
new file mode 100644
index 0000000..bbdbe93
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_stat.c
@@ -0,0 +1,950 @@
+/*******************************************************************************
+ * Modern ConfigFS group context specific iSCSI statistics based on original
+ * iscsi_target_mib.c code
+ *
+ * Copyright (c) 2011 Rising Tide Systems
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/configfs.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/configfs_macros.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_stat.h"
+
+#ifndef INITIAL_JIFFIES
+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
+#endif
+
+/* Instance Attributes Table */
+#define ISCSI_INST_NUM_NODES		1
+#define ISCSI_INST_DESCR		"Storage Engine Target"
+#define ISCSI_INST_LAST_FAILURE_TYPE	0
+#define ISCSI_DISCONTINUITY_TIME	0
+
+#define ISCSI_NODE_INDEX		1
+
+#define ISPRINT(a)   ((a >= ' ') && (a <= '~'))
+
+/****************************************************************************
+ * iSCSI MIB Tables
+ ****************************************************************************/
+/*
+ * Instance Attributes Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_instance, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_INSTANCE_ATTR(_name, _mode)			\
+static struct iscsi_stat_instance_attribute			\
+			iscsi_stat_instance_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,				\
+	iscsi_stat_instance_show_attr_##_name,			\
+	iscsi_stat_instance_store_attr_##_name);
+
+#define ISCSI_STAT_INSTANCE_ATTR_RO(_name)			\
+static struct iscsi_stat_instance_attribute			\
+			iscsi_stat_instance_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,				\
+	iscsi_stat_instance_show_attr_##_name);
+
+static ssize_t iscsi_stat_instance_show_attr_inst(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_instance_show_attr_min_ver(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(min_ver);
+
+static ssize_t iscsi_stat_instance_show_attr_max_ver(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(max_ver);
+
+static ssize_t iscsi_stat_instance_show_attr_portals(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_num_tpg_nps);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(portals);
+
+static ssize_t iscsi_stat_instance_show_attr_nodes(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_INST_NUM_NODES);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(nodes);
+
+static ssize_t iscsi_stat_instance_show_attr_sessions(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_nsessions);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(sessions);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_sess(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+	u32 sess_err_count;
+
+	spin_lock_bh(&sess_err->lock);
+	sess_err_count = (sess_err->digest_errors +
+			  sess_err->cxn_timeout_errors +
+			  sess_err->pdu_format_errors);
+	spin_unlock_bh(&sess_err->lock);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", sess_err_count);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_sess);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_type(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+	return snprintf(page, PAGE_SIZE, "%u\n",
+			sess_err->last_sess_failure_type);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_type);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_rem_name(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+	return snprintf(page, PAGE_SIZE, "%s\n",
+			sess_err->last_sess_fail_rem_name[0] ?
+			sess_err->last_sess_fail_rem_name : NONE);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_rem_name);
+
+static ssize_t iscsi_stat_instance_show_attr_disc_time(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DISCONTINUITY_TIME);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(disc_time);
+
+static ssize_t iscsi_stat_instance_show_attr_description(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%s\n", ISCSI_INST_DESCR);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(description);
+
+static ssize_t iscsi_stat_instance_show_attr_vendor(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n");
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(vendor);
+
+static ssize_t iscsi_stat_instance_show_attr_version(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%s\n", ISCSIT_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(version);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_instance, iscsi_wwn_stat_grps,
+		iscsi_instance_group);
+
+static struct configfs_attribute *iscsi_stat_instance_attrs[] = {
+	&iscsi_stat_instance_inst.attr,
+	&iscsi_stat_instance_min_ver.attr,
+	&iscsi_stat_instance_max_ver.attr,
+	&iscsi_stat_instance_portals.attr,
+	&iscsi_stat_instance_nodes.attr,
+	&iscsi_stat_instance_sessions.attr,
+	&iscsi_stat_instance_fail_sess.attr,
+	&iscsi_stat_instance_fail_type.attr,
+	&iscsi_stat_instance_fail_rem_name.attr,
+	&iscsi_stat_instance_disc_time.attr,
+	&iscsi_stat_instance_description.attr,
+	&iscsi_stat_instance_vendor.attr,
+	&iscsi_stat_instance_version.attr,
+	NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_instance_item_ops = {
+	.show_attribute		= iscsi_stat_instance_attr_show,
+	.store_attribute	= iscsi_stat_instance_attr_store,
+};
+
+struct config_item_type iscsi_stat_instance_cit = {
+	.ct_item_ops		= &iscsi_stat_instance_item_ops,
+	.ct_attrs		= iscsi_stat_instance_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Instance Session Failure Stats Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_sess_err, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_SESS_ERR_ATTR(_name, _mode)			\
+static struct iscsi_stat_sess_err_attribute			\
+			iscsi_stat_sess_err_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,				\
+	iscsi_stat_sess_err_show_attr_##_name,			\
+	iscsi_stat_sess_err_store_attr_##_name);
+
+#define ISCSI_STAT_SESS_ERR_ATTR_RO(_name)			\
+static struct iscsi_stat_sess_err_attribute			\
+			iscsi_stat_sess_err_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,				\
+	iscsi_stat_sess_err_show_attr_##_name);
+
+static ssize_t iscsi_stat_sess_err_show_attr_inst(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_sess_err_show_attr_digest_errors(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", sess_err->digest_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(digest_errors);
+
+static ssize_t iscsi_stat_sess_err_show_attr_cxn_errors(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", sess_err->cxn_timeout_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(cxn_errors);
+
+static ssize_t iscsi_stat_sess_err_show_attr_format_errors(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", sess_err->pdu_format_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(format_errors);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_sess_err, iscsi_wwn_stat_grps,
+		iscsi_sess_err_group);
+
+static struct configfs_attribute *iscsi_stat_sess_err_attrs[] = {
+	&iscsi_stat_sess_err_inst.attr,
+	&iscsi_stat_sess_err_digest_errors.attr,
+	&iscsi_stat_sess_err_cxn_errors.attr,
+	&iscsi_stat_sess_err_format_errors.attr,
+	NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_sess_err_item_ops = {
+	.show_attribute		= iscsi_stat_sess_err_attr_show,
+	.store_attribute	= iscsi_stat_sess_err_attr_store,
+};
+
+struct config_item_type iscsi_stat_sess_err_cit = {
+	.ct_item_ops		= &iscsi_stat_sess_err_item_ops,
+	.ct_attrs		= iscsi_stat_sess_err_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Target Attributes Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_TGT_ATTR(_name, _mode)			\
+static struct iscsi_stat_tgt_attr_attribute			\
+			iscsi_stat_tgt_attr_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,				\
+	iscsi_stat_tgt-attr_show_attr_##_name,			\
+	iscsi_stat_tgt_attr_store_attr_##_name);
+
+#define ISCSI_STAT_TGT_ATTR_RO(_name)				\
+static struct iscsi_stat_tgt_attr_attribute			\
+			iscsi_stat_tgt_attr_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,				\
+	iscsi_stat_tgt_attr_show_attr_##_name);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_inst(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_TGT_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_indx(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_TGT_ATTR_RO(indx);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_login_fails(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	u32 fail_count;
+
+	spin_lock(&lstat->lock);
+	fail_count = (lstat->redirects + lstat->authorize_fails +
+			lstat->authenticate_fails + lstat->negotiate_fails +
+			lstat->other_fails);
+	spin_unlock(&lstat->lock);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", fail_count);
+}
+ISCSI_STAT_TGT_ATTR_RO(login_fails);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_time(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	u32 last_fail_time;
+
+	spin_lock(&lstat->lock);
+	last_fail_time = lstat->last_fail_time ?
+			(u32)(((u32)lstat->last_fail_time -
+				INITIAL_JIFFIES) * 100 / HZ) : 0;
+	spin_unlock(&lstat->lock);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", last_fail_time);
+}
+ISCSI_STAT_TGT_ATTR_RO(last_fail_time);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_type(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	u32 last_fail_type;
+
+	spin_lock(&lstat->lock);
+	last_fail_type = lstat->last_fail_type;
+	spin_unlock(&lstat->lock);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", last_fail_type);
+}
+ISCSI_STAT_TGT_ATTR_RO(last_fail_type);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_name(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	unsigned char buf[224];
+
+	spin_lock(&lstat->lock);
+	snprintf(buf, 224, "%s", lstat->last_intr_fail_name[0] ?
+				lstat->last_intr_fail_name : NONE);
+	spin_unlock(&lstat->lock);
+
+	return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_name);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr_type(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+			struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	unsigned char buf[8];
+
+	spin_lock(&lstat->lock);
+	snprintf(buf, 8, "%s", (lstat->last_intr_fail_ip_addr != NULL) ?
+				"ipv6" : "ipv4");
+	spin_unlock(&lstat->lock);
+
+	return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr_type);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+			struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	unsigned char buf[32];
+
+	spin_lock(&lstat->lock);
+	if (lstat->last_intr_fail_ip_family == AF_INET6)
+		snprintf(buf, 32, "[%s]", lstat->last_intr_fail_ip_addr);
+	else
+		snprintf(buf, 32, "%s", lstat->last_intr_fail_ip_addr);
+	spin_unlock(&lstat->lock);
+
+	return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps,
+		iscsi_tgt_attr_group);
+
+static struct configfs_attribute *iscsi_stat_tgt_attr_attrs[] = {
+	&iscsi_stat_tgt_attr_inst.attr,
+	&iscsi_stat_tgt_attr_indx.attr,
+	&iscsi_stat_tgt_attr_login_fails.attr,
+	&iscsi_stat_tgt_attr_last_fail_time.attr,
+	&iscsi_stat_tgt_attr_last_fail_type.attr,
+	&iscsi_stat_tgt_attr_fail_intr_name.attr,
+	&iscsi_stat_tgt_attr_fail_intr_addr_type.attr,
+	&iscsi_stat_tgt_attr_fail_intr_addr.attr,
+	NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_tgt_attr_item_ops = {
+	.show_attribute		= iscsi_stat_tgt_attr_attr_show,
+	.store_attribute	= iscsi_stat_tgt_attr_attr_store,
+};
+
+struct config_item_type iscsi_stat_tgt_attr_cit = {
+	.ct_item_ops		= &iscsi_stat_tgt_attr_item_ops,
+	.ct_attrs		= iscsi_stat_tgt_attr_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Target Login Stats Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_login, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_LOGIN(_name, _mode)				\
+static struct iscsi_stat_login_attribute			\
+			iscsi_stat_login_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,				\
+	iscsi_stat_login_show_attr_##_name,			\
+	iscsi_stat_login_store_attr_##_name);
+
+#define ISCSI_STAT_LOGIN_RO(_name)				\
+static struct iscsi_stat_login_attribute			\
+			iscsi_stat_login_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,				\
+	iscsi_stat_login_show_attr_##_name);
+
+static ssize_t iscsi_stat_login_show_attr_inst(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_LOGIN_RO(inst);
+
+static ssize_t iscsi_stat_login_show_attr_indx(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_LOGIN_RO(indx);
+
+static ssize_t iscsi_stat_login_show_attr_accepts(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	ssize_t ret;
+
+	spin_lock(&lstat->lock);
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->accepts);
+	spin_unlock(&lstat->lock);
+
+	return ret;
+}
+ISCSI_STAT_LOGIN_RO(accepts);
+
+static ssize_t iscsi_stat_login_show_attr_other_fails(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	ssize_t ret;
+
+	spin_lock(&lstat->lock);
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->other_fails);
+	spin_unlock(&lstat->lock);
+
+	return ret;
+}
+ISCSI_STAT_LOGIN_RO(other_fails);
+
+static ssize_t iscsi_stat_login_show_attr_redirects(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	ssize_t ret;
+
+	spin_lock(&lstat->lock);
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->redirects);
+	spin_unlock(&lstat->lock);
+
+	return ret;
+}
+ISCSI_STAT_LOGIN_RO(redirects);
+
+static ssize_t iscsi_stat_login_show_attr_authorize_fails(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	ssize_t ret;
+
+	spin_lock(&lstat->lock);
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authorize_fails);
+	spin_unlock(&lstat->lock);
+
+	return ret;
+}
+ISCSI_STAT_LOGIN_RO(authorize_fails);
+
+static ssize_t iscsi_stat_login_show_attr_authenticate_fails(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	ssize_t ret;
+
+	spin_lock(&lstat->lock);
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authenticate_fails);
+	spin_unlock(&lstat->lock);
+
+	return ret;
+}
+ISCSI_STAT_LOGIN_RO(authenticate_fails);
+
+static ssize_t iscsi_stat_login_show_attr_negotiate_fails(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+				struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_login_stats *lstat = &tiqn->login_stats;
+	ssize_t ret;
+
+	spin_lock(&lstat->lock);
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->negotiate_fails);
+	spin_unlock(&lstat->lock);
+
+	return ret;
+}
+ISCSI_STAT_LOGIN_RO(negotiate_fails);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_login, iscsi_wwn_stat_grps,
+		iscsi_login_stats_group);
+
+static struct configfs_attribute *iscsi_stat_login_stats_attrs[] = {
+	&iscsi_stat_login_inst.attr,
+	&iscsi_stat_login_indx.attr,
+	&iscsi_stat_login_accepts.attr,
+	&iscsi_stat_login_other_fails.attr,
+	&iscsi_stat_login_redirects.attr,
+	&iscsi_stat_login_authorize_fails.attr,
+	&iscsi_stat_login_authenticate_fails.attr,
+	&iscsi_stat_login_negotiate_fails.attr,
+	NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_login_stats_item_ops = {
+	.show_attribute		= iscsi_stat_login_attr_show,
+	.store_attribute	= iscsi_stat_login_attr_store,
+};
+
+struct config_item_type iscsi_stat_login_cit = {
+	.ct_item_ops		= &iscsi_stat_login_stats_item_ops,
+	.ct_attrs		= iscsi_stat_login_stats_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Target Logout Stats Table
+ */
+
+CONFIGFS_EATTR_STRUCT(iscsi_stat_logout, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_LOGOUT(_name, _mode)				\
+static struct iscsi_stat_logout_attribute			\
+			iscsi_stat_logout_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,				\
+	iscsi_stat_logout_show_attr_##_name,			\
+	iscsi_stat_logout_store_attr_##_name);
+
+#define ISCSI_STAT_LOGOUT_RO(_name)				\
+static struct iscsi_stat_logout_attribute			\
+			iscsi_stat_logout_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,				\
+	iscsi_stat_logout_show_attr_##_name);
+
+static ssize_t iscsi_stat_logout_show_attr_inst(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+			struct iscsi_tiqn, tiqn_stat_grps);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_LOGOUT_RO(inst);
+
+static ssize_t iscsi_stat_logout_show_attr_indx(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_LOGOUT_RO(indx);
+
+static ssize_t iscsi_stat_logout_show_attr_normal_logouts(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+			struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_logout_stats *lstats = &tiqn->logout_stats;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", lstats->normal_logouts);
+}
+ISCSI_STAT_LOGOUT_RO(normal_logouts);
+
+static ssize_t iscsi_stat_logout_show_attr_abnormal_logouts(
+	struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+	struct iscsi_tiqn *tiqn = container_of(igrps,
+			struct iscsi_tiqn, tiqn_stat_grps);
+	struct iscsi_logout_stats *lstats = &tiqn->logout_stats;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", lstats->abnormal_logouts);
+}
+ISCSI_STAT_LOGOUT_RO(abnormal_logouts);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_logout, iscsi_wwn_stat_grps,
+		iscsi_logout_stats_group);
+
+static struct configfs_attribute *iscsi_stat_logout_stats_attrs[] = {
+	&iscsi_stat_logout_inst.attr,
+	&iscsi_stat_logout_indx.attr,
+	&iscsi_stat_logout_normal_logouts.attr,
+	&iscsi_stat_logout_abnormal_logouts.attr,
+	NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_logout_stats_item_ops = {
+	.show_attribute		= iscsi_stat_logout_attr_show,
+	.store_attribute	= iscsi_stat_logout_attr_store,
+};
+
+struct config_item_type iscsi_stat_logout_cit = {
+	.ct_item_ops		= &iscsi_stat_logout_stats_item_ops,
+	.ct_attrs		= iscsi_stat_logout_stats_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Session Stats Table
+ */
+
+CONFIGFS_EATTR_STRUCT(iscsi_stat_sess, iscsi_node_stat_grps);
+#define ISCSI_STAT_SESS(_name, _mode)				\
+static struct iscsi_stat_sess_attribute				\
+			iscsi_stat_sess_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,				\
+	iscsi_stat_sess_show_attr_##_name,			\
+	iscsi_stat_sess_store_attr_##_name);
+
+#define ISCSI_STAT_SESS_RO(_name)				\
+static struct iscsi_stat_sess_attribute				\
+			iscsi_stat_sess_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,				\
+	iscsi_stat_sess_show_attr_##_name);
+
+static ssize_t iscsi_stat_sess_show_attr_inst(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_wwn *wwn = acl->se_node_acl.se_tpg->se_tpg_wwn;
+	struct iscsi_tiqn *tiqn = container_of(wwn,
+			struct iscsi_tiqn, tiqn_wwn);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_SESS_RO(inst);
+
+static ssize_t iscsi_stat_sess_show_attr_node(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%u\n",
+				sess->sess_ops->SessionType ? 0 : ISCSI_NODE_INDEX);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(node);
+
+static ssize_t iscsi_stat_sess_show_attr_indx(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%u\n",
+					sess->session_index);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(indx);
+
+static ssize_t iscsi_stat_sess_show_attr_cmd_pdus(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(cmd_pdus);
+
+static ssize_t iscsi_stat_sess_show_attr_rsp_pdus(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(rsp_pdus);
+
+static ssize_t iscsi_stat_sess_show_attr_txdata_octs(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%llu\n",
+				(unsigned long long)sess->tx_data_octets);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(txdata_octs);
+
+static ssize_t iscsi_stat_sess_show_attr_rxdata_octs(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%llu\n",
+				(unsigned long long)sess->rx_data_octets);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(rxdata_octs);
+
+static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%u\n",
+					sess->conn_digest_errors);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(conn_digest_errors);
+
+static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors(
+	struct iscsi_node_stat_grps *igrps, char *page)
+{
+	struct iscsi_node_acl *acl = container_of(igrps,
+			struct iscsi_node_acl, node_stat_grps);
+	struct se_node_acl *se_nacl = &acl->se_node_acl;
+	struct iscsi_session *sess;
+	struct se_session *se_sess;
+	ssize_t ret = 0;
+
+	spin_lock_bh(&se_nacl->nacl_sess_lock);
+	se_sess = se_nacl->nacl_sess;
+	if (se_sess) {
+		sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+		if (sess)
+			ret = snprintf(page, PAGE_SIZE, "%u\n",
+					sess->conn_timeout_errors);
+	}
+	spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+	return ret;
+}
+ISCSI_STAT_SESS_RO(conn_timeout_errors);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_sess, iscsi_node_stat_grps,
+		iscsi_sess_stats_group);
+
+static struct configfs_attribute *iscsi_stat_sess_stats_attrs[] = {
+	&iscsi_stat_sess_inst.attr,
+	&iscsi_stat_sess_node.attr,
+	&iscsi_stat_sess_indx.attr,
+	&iscsi_stat_sess_cmd_pdus.attr,
+	&iscsi_stat_sess_rsp_pdus.attr,
+	&iscsi_stat_sess_txdata_octs.attr,
+	&iscsi_stat_sess_rxdata_octs.attr,
+	&iscsi_stat_sess_conn_digest_errors.attr,
+	&iscsi_stat_sess_conn_timeout_errors.attr,
+	NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_sess_stats_item_ops = {
+	.show_attribute		= iscsi_stat_sess_attr_show,
+	.store_attribute	= iscsi_stat_sess_attr_store,
+};
+
+struct config_item_type iscsi_stat_sess_cit = {
+	.ct_item_ops		= &iscsi_stat_sess_stats_item_ops,
+	.ct_attrs		= iscsi_stat_sess_stats_attrs,
+	.ct_owner		= THIS_MODULE,
+};
diff --git a/drivers/target/iscsi/iscsi_target_stat.h b/drivers/target/iscsi/iscsi_target_stat.h
new file mode 100644
index 0000000..3ff76b4
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_stat.h
@@ -0,0 +1,64 @@
+#ifndef ISCSI_TARGET_STAT_H
+#define ISCSI_TARGET_STAT_H
+
+/*
+ * For struct iscsi_tiqn->tiqn_wwn default groups
+ */
+extern struct config_item_type iscsi_stat_instance_cit;
+extern struct config_item_type iscsi_stat_sess_err_cit;
+extern struct config_item_type iscsi_stat_tgt_attr_cit;
+extern struct config_item_type iscsi_stat_login_cit;
+extern struct config_item_type iscsi_stat_logout_cit;
+
+/*
+ * For struct iscsi_session->se_sess default groups
+ */
+extern struct config_item_type iscsi_stat_sess_cit;
+
+/* iSCSI session error types */
+#define ISCSI_SESS_ERR_UNKNOWN		0
+#define ISCSI_SESS_ERR_DIGEST		1
+#define ISCSI_SESS_ERR_CXN_TIMEOUT	2
+#define ISCSI_SESS_ERR_PDU_FORMAT	3
+
+/* iSCSI session error stats */
+struct iscsi_sess_err_stats {
+	spinlock_t	lock;
+	u32		digest_errors;
+	u32		cxn_timeout_errors;
+	u32		pdu_format_errors;
+	u32		last_sess_failure_type;
+	char		last_sess_fail_rem_name[224];
+} ____cacheline_aligned;
+
+/* iSCSI login failure types (sub oids) */
+#define ISCSI_LOGIN_FAIL_OTHER		2
+#define ISCSI_LOGIN_FAIL_REDIRECT	3
+#define ISCSI_LOGIN_FAIL_AUTHORIZE	4
+#define ISCSI_LOGIN_FAIL_AUTHENTICATE	5
+#define ISCSI_LOGIN_FAIL_NEGOTIATE	6
+
+/* iSCSI login stats */
+struct iscsi_login_stats {
+	spinlock_t	lock;
+	u32		accepts;
+	u32		other_fails;
+	u32		redirects;
+	u32		authorize_fails;
+	u32		authenticate_fails;
+	u32		negotiate_fails;	/* used for notifications */
+	u64		last_fail_time;		/* time stamp (jiffies) */
+	u32		last_fail_type;
+	int		last_intr_fail_ip_family;
+	unsigned char	last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE];
+	char		last_intr_fail_name[224];
+} ____cacheline_aligned;
+
+/* iSCSI logout stats */
+struct iscsi_logout_stats {
+	spinlock_t	lock;
+	u32		normal_logouts;
+	u32		abnormal_logouts;
+} ____cacheline_aligned;
+
+#endif   /*** ISCSI_TARGET_STAT_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
new file mode 100644
index 0000000..db1fe1e
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -0,0 +1,849 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target specific Task Management functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <asm/unaligned.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_tmr.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+u8 iscsit_tmr_abort_task(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	struct iscsi_cmd *ref_cmd;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+	struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+	struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
+
+	ref_cmd = iscsit_find_cmd_from_itt(conn, hdr->rtt);
+	if (!ref_cmd) {
+		pr_err("Unable to locate RefTaskTag: 0x%08x on CID:"
+			" %hu.\n", hdr->rtt, conn->cid);
+		return ((hdr->refcmdsn >= conn->sess->exp_cmd_sn) &&
+			(hdr->refcmdsn <= conn->sess->max_cmd_sn)) ?
+			ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK;
+	}
+	if (ref_cmd->cmd_sn != hdr->refcmdsn) {
+		pr_err("RefCmdSN 0x%08x does not equal"
+			" task's CmdSN 0x%08x. Rejecting ABORT_TASK.\n",
+			hdr->refcmdsn, ref_cmd->cmd_sn);
+		return ISCSI_TMF_RSP_REJECTED;
+	}
+
+	se_tmr->ref_task_tag		= hdr->rtt;
+	se_tmr->ref_cmd			= &ref_cmd->se_cmd;
+	tmr_req->ref_cmd_sn		= hdr->refcmdsn;
+	tmr_req->exp_data_sn		= hdr->exp_datasn;
+
+	return ISCSI_TMF_RSP_COMPLETE;
+}
+
+/*
+ *	Called from iscsit_handle_task_mgt_cmd().
+ */
+int iscsit_tmr_task_warm_reset(
+	struct iscsi_conn *conn,
+	struct iscsi_tmr_req *tmr_req,
+	unsigned char *buf)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+#if 0
+	struct iscsi_init_task_mgt_cmnd *hdr =
+		(struct iscsi_init_task_mgt_cmnd *) buf;
+#endif
+	if (!na->tmr_warm_reset) {
+		pr_err("TMR Opcode TARGET_WARM_RESET authorization"
+			" failed for Initiator Node: %s\n",
+			sess->se_sess->se_node_acl->initiatorname);
+		 return -1;
+	}
+	/*
+	 * Do the real work in transport_generic_do_tmr().
+	 */
+	return 0;
+}
+
+int iscsit_tmr_task_cold_reset(
+	struct iscsi_conn *conn,
+	struct iscsi_tmr_req *tmr_req,
+	unsigned char *buf)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+	if (!na->tmr_cold_reset) {
+		pr_err("TMR Opcode TARGET_COLD_RESET authorization"
+			" failed for Initiator Node: %s\n",
+			sess->se_sess->se_node_acl->initiatorname);
+		return -1;
+	}
+	/*
+	 * Do the real work in transport_generic_do_tmr().
+	 */
+	return 0;
+}
+
+u8 iscsit_tmr_task_reassign(
+	struct iscsi_cmd *cmd,
+	unsigned char *buf)
+{
+	struct iscsi_cmd *ref_cmd = NULL;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_conn_recovery *cr = NULL;
+	struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+	struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+	struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
+	int ret;
+
+	pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x,"
+		" RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n",
+		hdr->itt, hdr->rtt, hdr->exp_datasn, conn->cid);
+
+	if (conn->sess->sess_ops->ErrorRecoveryLevel != 2) {
+		pr_err("TMR TASK_REASSIGN not supported in ERL<2,"
+				" ignoring request.\n");
+		return ISCSI_TMF_RSP_NOT_SUPPORTED;
+	}
+
+	ret = iscsit_find_cmd_for_recovery(conn->sess, &ref_cmd, &cr, hdr->rtt);
+	if (ret == -2) {
+		pr_err("Command ITT: 0x%08x is still alligent to CID:"
+			" %hu\n", ref_cmd->init_task_tag, cr->cid);
+		return ISCSI_TMF_RSP_TASK_ALLEGIANT;
+	} else if (ret == -1) {
+		pr_err("Unable to locate RefTaskTag: 0x%08x in"
+			" connection recovery command list.\n", hdr->rtt);
+		return ISCSI_TMF_RSP_NO_TASK;
+	}
+	/*
+	 * Temporary check to prevent connection recovery for
+	 * connections with a differing MaxRecvDataSegmentLength.
+	 */
+	if (cr->maxrecvdatasegmentlength !=
+	    conn->conn_ops->MaxRecvDataSegmentLength) {
+		pr_err("Unable to perform connection recovery for"
+			" differing MaxRecvDataSegmentLength, rejecting"
+			" TMR TASK_REASSIGN.\n");
+		return ISCSI_TMF_RSP_REJECTED;
+	}
+
+	se_tmr->ref_task_tag		= hdr->rtt;
+	se_tmr->ref_cmd			= &ref_cmd->se_cmd;
+	se_tmr->ref_task_lun		= get_unaligned_le64(&hdr->lun);
+	tmr_req->ref_cmd_sn		= hdr->refcmdsn;
+	tmr_req->exp_data_sn		= hdr->exp_datasn;
+	tmr_req->conn_recovery		= cr;
+	tmr_req->task_reassign		= 1;
+	/*
+	 * Command can now be reassigned to a new connection.
+	 * The task management response must be sent before the
+	 * reassignment actually happens.  See iscsi_tmr_post_handler().
+	 */
+	return ISCSI_TMF_RSP_COMPLETE;
+}
+
+static void iscsit_task_reassign_remove_cmd(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn_recovery *cr,
+	struct iscsi_session *sess)
+{
+	int ret;
+
+	spin_lock(&cr->conn_recovery_cmd_lock);
+	ret = iscsit_remove_cmd_from_connection_recovery(cmd, sess);
+	spin_unlock(&cr->conn_recovery_cmd_lock);
+	if (!ret) {
+		pr_debug("iSCSI connection recovery successful for CID:"
+			" %hu on SID: %u\n", cr->cid, sess->sid);
+		iscsit_remove_active_connection_recovery_entry(cr, sess);
+	}
+}
+
+static int iscsit_task_reassign_complete_nop_out(
+	struct iscsi_tmr_req *tmr_req,
+	struct iscsi_conn *conn)
+{
+	struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+	struct se_cmd *se_cmd = se_tmr->ref_cmd;
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+	struct iscsi_conn_recovery *cr;
+
+	if (!cmd->cr) {
+		pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+			" is NULL!\n", cmd->init_task_tag);
+		return -1;
+	}
+	cr = cmd->cr;
+
+	/*
+	 * Reset the StatSN so a new one for this commands new connection
+	 * will be assigned.
+	 * Reset the ExpStatSN as well so we may receive Status SNACKs.
+	 */
+	cmd->stat_sn = cmd->exp_stat_sn = 0;
+
+	iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess);
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	cmd->i_state = ISTATE_SEND_NOPIN;
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+	return 0;
+}
+
+static int iscsit_task_reassign_complete_write(
+	struct iscsi_cmd *cmd,
+	struct iscsi_tmr_req *tmr_req)
+{
+	int no_build_r2ts = 0;
+	u32 length = 0, offset = 0;
+	struct iscsi_conn *conn = cmd->conn;
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+	/*
+	 * The Initiator must not send a R2T SNACK with a Begrun less than
+	 * the TMR TASK_REASSIGN's ExpDataSN.
+	 */
+	if (!tmr_req->exp_data_sn) {
+		cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK;
+		cmd->acked_data_sn = 0;
+	} else {
+		cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+		cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
+	}
+
+	/*
+	 * The TMR TASK_REASSIGN's ExpDataSN contains the next R2TSN the
+	 * Initiator is expecting.  The Target controls all WRITE operations
+	 * so if we have received all DataOUT we can safety ignore Initiator.
+	 */
+	if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+		if (!atomic_read(&cmd->transport_sent)) {
+			pr_debug("WRITE ITT: 0x%08x: t_state: %d"
+				" never sent to transport\n",
+				cmd->init_task_tag, cmd->se_cmd.t_state);
+			return transport_generic_handle_data(se_cmd);
+		}
+
+		cmd->i_state = ISTATE_SEND_STATUS;
+		iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+		return 0;
+	}
+
+	/*
+	 * Special case to deal with DataSequenceInOrder=No and Non-Immeidate
+	 * Unsolicited DataOut.
+	 */
+	if (cmd->unsolicited_data) {
+		cmd->unsolicited_data = 0;
+
+		offset = cmd->next_burst_len = cmd->write_data_done;
+
+		if ((conn->sess->sess_ops->FirstBurstLength - offset) >=
+		     cmd->data_length) {
+			no_build_r2ts = 1;
+			length = (cmd->data_length - offset);
+		} else
+			length = (conn->sess->sess_ops->FirstBurstLength - offset);
+
+		spin_lock_bh(&cmd->r2t_lock);
+		if (iscsit_add_r2t_to_list(cmd, offset, length, 0, 0) < 0) {
+			spin_unlock_bh(&cmd->r2t_lock);
+			return -1;
+		}
+		cmd->outstanding_r2ts++;
+		spin_unlock_bh(&cmd->r2t_lock);
+
+		if (no_build_r2ts)
+			return 0;
+	}
+	/*
+	 * iscsit_build_r2ts_for_cmd() can handle the rest from here.
+	 */
+	return iscsit_build_r2ts_for_cmd(cmd, conn, 2);
+}
+
+static int iscsit_task_reassign_complete_read(
+	struct iscsi_cmd *cmd,
+	struct iscsi_tmr_req *tmr_req)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_datain_req *dr;
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+	/*
+	 * The Initiator must not send a Data SNACK with a BegRun less than
+	 * the TMR TASK_REASSIGN's ExpDataSN.
+	 */
+	if (!tmr_req->exp_data_sn) {
+		cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK;
+		cmd->acked_data_sn = 0;
+	} else {
+		cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+		cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
+	}
+
+	if (!atomic_read(&cmd->transport_sent)) {
+		pr_debug("READ ITT: 0x%08x: t_state: %d never sent to"
+			" transport\n", cmd->init_task_tag,
+			cmd->se_cmd.t_state);
+		transport_generic_handle_cdb(se_cmd);
+		return 0;
+	}
+
+	if (!atomic_read(&se_cmd->t_transport_complete)) {
+		pr_err("READ ITT: 0x%08x: t_state: %d, never returned"
+			" from transport\n", cmd->init_task_tag,
+			cmd->se_cmd.t_state);
+		return -1;
+	}
+
+	dr = iscsit_allocate_datain_req();
+	if (!dr)
+		return -1;
+	/*
+	 * The TMR TASK_REASSIGN's ExpDataSN contains the next DataSN the
+	 * Initiator is expecting.
+	 */
+	dr->data_sn = dr->begrun = tmr_req->exp_data_sn;
+	dr->runlength = 0;
+	dr->generate_recovery_values = 1;
+	dr->recovery = DATAIN_CONNECTION_RECOVERY;
+
+	iscsit_attach_datain_req(cmd, dr);
+
+	cmd->i_state = ISTATE_SEND_DATAIN;
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+	return 0;
+}
+
+static int iscsit_task_reassign_complete_none(
+	struct iscsi_cmd *cmd,
+	struct iscsi_tmr_req *tmr_req)
+{
+	struct iscsi_conn *conn = cmd->conn;
+
+	cmd->i_state = ISTATE_SEND_STATUS;
+	iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+	return 0;
+}
+
+static int iscsit_task_reassign_complete_scsi_cmnd(
+	struct iscsi_tmr_req *tmr_req,
+	struct iscsi_conn *conn)
+{
+	struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+	struct se_cmd *se_cmd = se_tmr->ref_cmd;
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+	struct iscsi_conn_recovery *cr;
+
+	if (!cmd->cr) {
+		pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+			" is NULL!\n", cmd->init_task_tag);
+		return -1;
+	}
+	cr = cmd->cr;
+
+	/*
+	 * Reset the StatSN so a new one for this commands new connection
+	 * will be assigned.
+	 * Reset the ExpStatSN as well so we may receive Status SNACKs.
+	 */
+	cmd->stat_sn = cmd->exp_stat_sn = 0;
+
+	iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess);
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) {
+		cmd->i_state = ISTATE_SEND_STATUS;
+		iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+		return 0;
+	}
+
+	switch (cmd->data_direction) {
+	case DMA_TO_DEVICE:
+		return iscsit_task_reassign_complete_write(cmd, tmr_req);
+	case DMA_FROM_DEVICE:
+		return iscsit_task_reassign_complete_read(cmd, tmr_req);
+	case DMA_NONE:
+		return iscsit_task_reassign_complete_none(cmd, tmr_req);
+	default:
+		pr_err("Unknown cmd->data_direction: 0x%02x\n",
+				cmd->data_direction);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int iscsit_task_reassign_complete(
+	struct iscsi_tmr_req *tmr_req,
+	struct iscsi_conn *conn)
+{
+	struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+	struct se_cmd *se_cmd;
+	struct iscsi_cmd *cmd;
+	int ret = 0;
+
+	if (!se_tmr->ref_cmd) {
+		pr_err("TMR Request is missing a RefCmd struct iscsi_cmd.\n");
+		return -1;
+	}
+	se_cmd = se_tmr->ref_cmd;
+	cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	cmd->conn = conn;
+
+	switch (cmd->iscsi_opcode) {
+	case ISCSI_OP_NOOP_OUT:
+		ret = iscsit_task_reassign_complete_nop_out(tmr_req, conn);
+		break;
+	case ISCSI_OP_SCSI_CMD:
+		ret = iscsit_task_reassign_complete_scsi_cmnd(tmr_req, conn);
+		break;
+	default:
+		 pr_err("Illegal iSCSI Opcode 0x%02x during"
+			" command realligence\n", cmd->iscsi_opcode);
+		return -1;
+	}
+
+	if (ret != 0)
+		return ret;
+
+	pr_debug("Completed connection realligence for Opcode: 0x%02x,"
+		" ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode,
+			cmd->init_task_tag, conn->cid);
+
+	return 0;
+}
+
+/*
+ *	Handles special after-the-fact actions related to TMRs.
+ *	Right now the only one that its really needed for is
+ *	connection recovery releated TASK_REASSIGN.
+ */
+extern int iscsit_tmr_post_handler(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+	struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+	struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+
+	if (tmr_req->task_reassign &&
+	   (se_tmr->response == ISCSI_TMF_RSP_COMPLETE))
+		return iscsit_task_reassign_complete(tmr_req, conn);
+
+	return 0;
+}
+
+/*
+ *	Nothing to do here, but leave it for good measure. :-)
+ */
+int iscsit_task_reassign_prepare_read(
+	struct iscsi_tmr_req *tmr_req,
+	struct iscsi_conn *conn)
+{
+	return 0;
+}
+
+static void iscsit_task_reassign_prepare_unsolicited_dataout(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	int i, j;
+	struct iscsi_pdu *pdu = NULL;
+	struct iscsi_seq *seq = NULL;
+
+	if (conn->sess->sess_ops->DataSequenceInOrder) {
+		cmd->data_sn = 0;
+
+		if (cmd->immediate_data)
+			cmd->r2t_offset += (cmd->first_burst_len -
+				cmd->seq_start_offset);
+
+		if (conn->sess->sess_ops->DataPDUInOrder) {
+			cmd->write_data_done -= (cmd->immediate_data) ?
+						(cmd->first_burst_len -
+						 cmd->seq_start_offset) :
+						 cmd->first_burst_len;
+			cmd->first_burst_len = 0;
+			return;
+		}
+
+		for (i = 0; i < cmd->pdu_count; i++) {
+			pdu = &cmd->pdu_list[i];
+
+			if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+				continue;
+
+			if ((pdu->offset >= cmd->seq_start_offset) &&
+			   ((pdu->offset + pdu->length) <=
+			     cmd->seq_end_offset)) {
+				cmd->first_burst_len -= pdu->length;
+				cmd->write_data_done -= pdu->length;
+				pdu->status = ISCSI_PDU_NOT_RECEIVED;
+			}
+		}
+	} else {
+		for (i = 0; i < cmd->seq_count; i++) {
+			seq = &cmd->seq_list[i];
+
+			if (seq->type != SEQTYPE_UNSOLICITED)
+				continue;
+
+			cmd->write_data_done -=
+					(seq->offset - seq->orig_offset);
+			cmd->first_burst_len = 0;
+			seq->data_sn = 0;
+			seq->offset = seq->orig_offset;
+			seq->next_burst_len = 0;
+			seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+			if (conn->sess->sess_ops->DataPDUInOrder)
+				continue;
+
+			for (j = 0; j < seq->pdu_count; j++) {
+				pdu = &cmd->pdu_list[j+seq->pdu_start];
+
+				if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+					continue;
+
+				pdu->status = ISCSI_PDU_NOT_RECEIVED;
+			}
+		}
+	}
+}
+
+int iscsit_task_reassign_prepare_write(
+	struct iscsi_tmr_req *tmr_req,
+	struct iscsi_conn *conn)
+{
+	struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+	struct se_cmd *se_cmd = se_tmr->ref_cmd;
+	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+	struct iscsi_pdu *pdu = NULL;
+	struct iscsi_r2t *r2t = NULL, *r2t_tmp;
+	int first_incomplete_r2t = 1, i = 0;
+
+	/*
+	 * The command was in the process of receiving Unsolicited DataOUT when
+	 * the connection failed.
+	 */
+	if (cmd->unsolicited_data)
+		iscsit_task_reassign_prepare_unsolicited_dataout(cmd, conn);
+
+	/*
+	 * The Initiator is requesting R2Ts starting from zero,  skip
+	 * checking acknowledged R2Ts and start checking struct iscsi_r2ts
+	 * greater than zero.
+	 */
+	if (!tmr_req->exp_data_sn)
+		goto drop_unacknowledged_r2ts;
+
+	/*
+	 * We now check that the PDUs in DataOUT sequences below
+	 * the TMR TASK_REASSIGN ExpDataSN (R2TSN the Initiator is
+	 * expecting next) have all the DataOUT they require to complete
+	 * the DataOUT sequence.  First scan from R2TSN 0 to TMR
+	 * TASK_REASSIGN ExpDataSN-1.
+	 *
+	 * If we have not received all DataOUT in question,  we must
+	 * make sure to make the appropriate changes to values in
+	 * struct iscsi_cmd (and elsewhere depending on session parameters)
+	 * so iscsit_build_r2ts_for_cmd() in iscsit_task_reassign_complete_write()
+	 * will resend a new R2T for the DataOUT sequences in question.
+	 */
+	spin_lock_bh(&cmd->r2t_lock);
+	if (list_empty(&cmd->cmd_r2t_list)) {
+		spin_unlock_bh(&cmd->r2t_lock);
+		return -1;
+	}
+
+	list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+
+		if (r2t->r2t_sn >= tmr_req->exp_data_sn)
+			continue;
+		/*
+		 * Safely ignore Recovery R2Ts and R2Ts that have completed
+		 * DataOUT sequences.
+		 */
+		if (r2t->seq_complete)
+			continue;
+
+		if (r2t->recovery_r2t)
+			continue;
+
+		/*
+		 *                 DataSequenceInOrder=Yes:
+		 *
+		 * Taking into account the iSCSI implementation requirement of
+		 * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and
+		 * DataSequenceInOrder=Yes, we must take into consideration
+		 * the following:
+		 *
+		 *                  DataSequenceInOrder=No:
+		 *
+		 * Taking into account that the Initiator controls the (possibly
+		 * random) PDU Order in (possibly random) Sequence Order of
+		 * DataOUT the target requests with R2Ts,  we must take into
+		 * consideration the following:
+		 *
+		 *      DataPDUInOrder=Yes for DataSequenceInOrder=[Yes,No]:
+		 *
+		 * While processing non-complete R2T DataOUT sequence requests
+		 * the Target will re-request only the total sequence length
+		 * minus current received offset.  This is because we must
+		 * assume the initiator will continue sending DataOUT from the
+		 * last PDU before the connection failed.
+		 *
+		 *      DataPDUInOrder=No for DataSequenceInOrder=[Yes,No]:
+		 *
+		 * While processing non-complete R2T DataOUT sequence requests
+		 * the Target will re-request the entire DataOUT sequence if
+		 * any single PDU is missing from the sequence.  This is because
+		 * we have no logical method to determine the next PDU offset,
+		 * and we must assume the Initiator will be sending any random
+		 * PDU offset in the current sequence after TASK_REASSIGN
+		 * has completed.
+		 */
+		if (conn->sess->sess_ops->DataSequenceInOrder) {
+			if (!first_incomplete_r2t) {
+				cmd->r2t_offset -= r2t->xfer_len;
+				goto next;
+			}
+
+			if (conn->sess->sess_ops->DataPDUInOrder) {
+				cmd->data_sn = 0;
+				cmd->r2t_offset -= (r2t->xfer_len -
+					cmd->next_burst_len);
+				first_incomplete_r2t = 0;
+				goto next;
+			}
+
+			cmd->data_sn = 0;
+			cmd->r2t_offset -= r2t->xfer_len;
+
+			for (i = 0; i < cmd->pdu_count; i++) {
+				pdu = &cmd->pdu_list[i];
+
+				if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+					continue;
+
+				if ((pdu->offset >= r2t->offset) &&
+				    (pdu->offset < (r2t->offset +
+						r2t->xfer_len))) {
+					cmd->next_burst_len -= pdu->length;
+					cmd->write_data_done -= pdu->length;
+					pdu->status = ISCSI_PDU_NOT_RECEIVED;
+				}
+			}
+
+			first_incomplete_r2t = 0;
+		} else {
+			struct iscsi_seq *seq;
+
+			seq = iscsit_get_seq_holder(cmd, r2t->offset,
+					r2t->xfer_len);
+			if (!seq) {
+				spin_unlock_bh(&cmd->r2t_lock);
+				return -1;
+			}
+
+			cmd->write_data_done -=
+					(seq->offset - seq->orig_offset);
+			seq->data_sn = 0;
+			seq->offset = seq->orig_offset;
+			seq->next_burst_len = 0;
+			seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+			cmd->seq_send_order--;
+
+			if (conn->sess->sess_ops->DataPDUInOrder)
+				goto next;
+
+			for (i = 0; i < seq->pdu_count; i++) {
+				pdu = &cmd->pdu_list[i+seq->pdu_start];
+
+				if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+					continue;
+
+				pdu->status = ISCSI_PDU_NOT_RECEIVED;
+			}
+		}
+
+next:
+		cmd->outstanding_r2ts--;
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	/*
+	 * We now drop all unacknowledged R2Ts, ie: ExpDataSN from TMR
+	 * TASK_REASSIGN to the last R2T in the list..  We are also careful
+	 * to check that the Initiator is not requesting R2Ts for DataOUT
+	 * sequences it has already completed.
+	 *
+	 * Free each R2T in question and adjust values in struct iscsi_cmd
+	 * accordingly so iscsit_build_r2ts_for_cmd() do the rest of
+	 * the work after the TMR TASK_REASSIGN Response is sent.
+	 */
+drop_unacknowledged_r2ts:
+
+	cmd->cmd_flags &= ~ICF_SENT_LAST_R2T;
+	cmd->r2t_sn = tmr_req->exp_data_sn;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) {
+		/*
+		 * Skip up to the R2T Sequence number provided by the
+		 * iSCSI TASK_REASSIGN TMR
+		 */
+		if (r2t->r2t_sn < tmr_req->exp_data_sn)
+			continue;
+
+		if (r2t->seq_complete) {
+			pr_err("Initiator is requesting R2Ts from"
+				" R2TSN: 0x%08x, but R2TSN: 0x%08x, Offset: %u,"
+				" Length: %u is already complete."
+				"   BAD INITIATOR ERL=2 IMPLEMENTATION!\n",
+				tmr_req->exp_data_sn, r2t->r2t_sn,
+				r2t->offset, r2t->xfer_len);
+			spin_unlock_bh(&cmd->r2t_lock);
+			return -1;
+		}
+
+		if (r2t->recovery_r2t) {
+			iscsit_free_r2t(r2t, cmd);
+			continue;
+		}
+
+		/*		   DataSequenceInOrder=Yes:
+		 *
+		 * Taking into account the iSCSI implementation requirement of
+		 * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and
+		 * DataSequenceInOrder=Yes, it's safe to subtract the R2Ts
+		 * entire transfer length from the commands R2T offset marker.
+		 *
+		 *		   DataSequenceInOrder=No:
+		 *
+		 * We subtract the difference from struct iscsi_seq between the
+		 * current offset and original offset from cmd->write_data_done
+		 * for account for DataOUT PDUs already received.  Then reset
+		 * the current offset to the original and zero out the current
+		 * burst length,  to make sure we re-request the entire DataOUT
+		 * sequence.
+		 */
+		if (conn->sess->sess_ops->DataSequenceInOrder)
+			cmd->r2t_offset -= r2t->xfer_len;
+		else
+			cmd->seq_send_order--;
+
+		cmd->outstanding_r2ts--;
+		iscsit_free_r2t(r2t, cmd);
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	return 0;
+}
+
+/*
+ *	Performs sanity checks TMR TASK_REASSIGN's ExpDataSN for
+ *	a given struct iscsi_cmd.
+ */
+int iscsit_check_task_reassign_expdatasn(
+	struct iscsi_tmr_req *tmr_req,
+	struct iscsi_conn *conn)
+{
+	struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+	struct se_cmd *se_cmd = se_tmr->ref_cmd;
+	struct iscsi_cmd *ref_cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+	if (ref_cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD)
+		return 0;
+
+	if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION)
+		return 0;
+
+	if (ref_cmd->data_direction == DMA_NONE)
+		return 0;
+
+	/*
+	 * For READs the TMR TASK_REASSIGNs ExpDataSN contains the next DataSN
+	 * of DataIN the Initiator is expecting.
+	 *
+	 * Also check that the Initiator is not re-requesting DataIN that has
+	 * already been acknowledged with a DataAck SNACK.
+	 */
+	if (ref_cmd->data_direction == DMA_FROM_DEVICE) {
+		if (tmr_req->exp_data_sn > ref_cmd->data_sn) {
+			pr_err("Received ExpDataSN: 0x%08x for READ"
+				" in TMR TASK_REASSIGN greater than command's"
+				" DataSN: 0x%08x.\n", tmr_req->exp_data_sn,
+				ref_cmd->data_sn);
+			return -1;
+		}
+		if ((ref_cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+		    (tmr_req->exp_data_sn <= ref_cmd->acked_data_sn)) {
+			pr_err("Received ExpDataSN: 0x%08x for READ"
+				" in TMR TASK_REASSIGN for previously"
+				" acknowledged DataIN: 0x%08x,"
+				" protocol error\n", tmr_req->exp_data_sn,
+				ref_cmd->acked_data_sn);
+			return -1;
+		}
+		return iscsit_task_reassign_prepare_read(tmr_req, conn);
+	}
+
+	/*
+	 * For WRITEs the TMR TASK_REASSIGNs ExpDataSN contains the next R2TSN
+	 * for R2Ts the Initiator is expecting.
+	 *
+	 * Do the magic in iscsit_task_reassign_prepare_write().
+	 */
+	if (ref_cmd->data_direction == DMA_TO_DEVICE) {
+		if (tmr_req->exp_data_sn > ref_cmd->r2t_sn) {
+			pr_err("Received ExpDataSN: 0x%08x for WRITE"
+				" in TMR TASK_REASSIGN greater than command's"
+				" R2TSN: 0x%08x.\n", tmr_req->exp_data_sn,
+					ref_cmd->r2t_sn);
+			return -1;
+		}
+		return iscsit_task_reassign_prepare_write(tmr_req, conn);
+	}
+
+	pr_err("Unknown iSCSI data_direction: 0x%02x\n",
+			ref_cmd->data_direction);
+
+	return -1;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tmr.h b/drivers/target/iscsi/iscsi_target_tmr.h
new file mode 100644
index 0000000..142e992
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tmr.h
@@ -0,0 +1,14 @@
+#ifndef ISCSI_TARGET_TMR_H
+#define ISCSI_TARGET_TMR_H
+
+extern u8 iscsit_tmr_abort_task(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_tmr_task_warm_reset(struct iscsi_conn *, struct iscsi_tmr_req *,
+			unsigned char *);
+extern int iscsit_tmr_task_cold_reset(struct iscsi_conn *, struct iscsi_tmr_req *,
+			unsigned char *);
+extern u8 iscsit_tmr_task_reassign(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_check_task_reassign_expdatasn(struct iscsi_tmr_req *,
+			struct iscsi_conn *);
+
+#endif /* ISCSI_TARGET_TMR_H */
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
new file mode 100644
index 0000000..d4cf2cd
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -0,0 +1,759 @@
+/*******************************************************************************
+ * This file contains iSCSI Target Portal Group related functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_tpg.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_nodeattrib.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_parameters.h"
+
+struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u16 tpgt)
+{
+	struct iscsi_portal_group *tpg;
+
+	tpg = kzalloc(sizeof(struct iscsi_portal_group), GFP_KERNEL);
+	if (!tpg) {
+		pr_err("Unable to allocate struct iscsi_portal_group\n");
+		return NULL;
+	}
+
+	tpg->tpgt = tpgt;
+	tpg->tpg_state = TPG_STATE_FREE;
+	tpg->tpg_tiqn = tiqn;
+	INIT_LIST_HEAD(&tpg->tpg_gnp_list);
+	INIT_LIST_HEAD(&tpg->tpg_list);
+	mutex_init(&tpg->tpg_access_lock);
+	mutex_init(&tpg->np_login_lock);
+	spin_lock_init(&tpg->tpg_state_lock);
+	spin_lock_init(&tpg->tpg_np_lock);
+
+	return tpg;
+}
+
+static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *);
+
+int iscsit_load_discovery_tpg(void)
+{
+	struct iscsi_param *param;
+	struct iscsi_portal_group *tpg;
+	int ret;
+
+	tpg = iscsit_alloc_portal_group(NULL, 1);
+	if (!tpg) {
+		pr_err("Unable to allocate struct iscsi_portal_group\n");
+		return -1;
+	}
+
+	ret = core_tpg_register(
+			&lio_target_fabric_configfs->tf_ops,
+			NULL, &tpg->tpg_se_tpg, (void *)tpg,
+			TRANSPORT_TPG_TYPE_DISCOVERY);
+	if (ret < 0) {
+		kfree(tpg);
+		return -1;
+	}
+
+	tpg->sid = 1; /* First Assigned LIO Session ID */
+	iscsit_set_default_tpg_attribs(tpg);
+
+	if (iscsi_create_default_params(&tpg->param_list) < 0)
+		goto out;
+	/*
+	 * By default we disable authentication for discovery sessions,
+	 * this can be changed with:
+	 *
+	 * /sys/kernel/config/target/iscsi/discovery_auth/enforce_discovery_auth
+	 */
+	param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+	if (!param)
+		goto out;
+
+	if (iscsi_update_param_value(param, "CHAP,None") < 0)
+		goto out;
+
+	tpg->tpg_attrib.authentication = 0;
+
+	spin_lock(&tpg->tpg_state_lock);
+	tpg->tpg_state  = TPG_STATE_ACTIVE;
+	spin_unlock(&tpg->tpg_state_lock);
+
+	iscsit_global->discovery_tpg = tpg;
+	pr_debug("CORE[0] - Allocated Discovery TPG\n");
+
+	return 0;
+out:
+	if (tpg->sid == 1)
+		core_tpg_deregister(&tpg->tpg_se_tpg);
+	kfree(tpg);
+	return -1;
+}
+
+void iscsit_release_discovery_tpg(void)
+{
+	struct iscsi_portal_group *tpg = iscsit_global->discovery_tpg;
+
+	if (!tpg)
+		return;
+
+	core_tpg_deregister(&tpg->tpg_se_tpg);
+
+	kfree(tpg);
+	iscsit_global->discovery_tpg = NULL;
+}
+
+struct iscsi_portal_group *iscsit_get_tpg_from_np(
+	struct iscsi_tiqn *tiqn,
+	struct iscsi_np *np)
+{
+	struct iscsi_portal_group *tpg = NULL;
+	struct iscsi_tpg_np *tpg_np;
+
+	spin_lock(&tiqn->tiqn_tpg_lock);
+	list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) {
+
+		spin_lock(&tpg->tpg_state_lock);
+		if (tpg->tpg_state == TPG_STATE_FREE) {
+			spin_unlock(&tpg->tpg_state_lock);
+			continue;
+		}
+		spin_unlock(&tpg->tpg_state_lock);
+
+		spin_lock(&tpg->tpg_np_lock);
+		list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
+			if (tpg_np->tpg_np == np) {
+				spin_unlock(&tpg->tpg_np_lock);
+				spin_unlock(&tiqn->tiqn_tpg_lock);
+				return tpg;
+			}
+		}
+		spin_unlock(&tpg->tpg_np_lock);
+	}
+	spin_unlock(&tiqn->tiqn_tpg_lock);
+
+	return NULL;
+}
+
+int iscsit_get_tpg(
+	struct iscsi_portal_group *tpg)
+{
+	int ret;
+
+	ret = mutex_lock_interruptible(&tpg->tpg_access_lock);
+	return ((ret != 0) || signal_pending(current)) ? -1 : 0;
+}
+
+void iscsit_put_tpg(struct iscsi_portal_group *tpg)
+{
+	mutex_unlock(&tpg->tpg_access_lock);
+}
+
+static void iscsit_clear_tpg_np_login_thread(
+	struct iscsi_tpg_np *tpg_np,
+	struct iscsi_portal_group *tpg)
+{
+	if (!tpg_np->tpg_np) {
+		pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
+		return;
+	}
+
+	iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg);
+}
+
+void iscsit_clear_tpg_np_login_threads(
+	struct iscsi_portal_group *tpg)
+{
+	struct iscsi_tpg_np *tpg_np;
+
+	spin_lock(&tpg->tpg_np_lock);
+	list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
+		if (!tpg_np->tpg_np) {
+			pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
+			continue;
+		}
+		spin_unlock(&tpg->tpg_np_lock);
+		iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+		spin_lock(&tpg->tpg_np_lock);
+	}
+	spin_unlock(&tpg->tpg_np_lock);
+}
+
+void iscsit_tpg_dump_params(struct iscsi_portal_group *tpg)
+{
+	iscsi_print_params(tpg->param_list);
+}
+
+static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	a->authentication = TA_AUTHENTICATION;
+	a->login_timeout = TA_LOGIN_TIMEOUT;
+	a->netif_timeout = TA_NETIF_TIMEOUT;
+	a->default_cmdsn_depth = TA_DEFAULT_CMDSN_DEPTH;
+	a->generate_node_acls = TA_GENERATE_NODE_ACLS;
+	a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS;
+	a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT;
+	a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT;
+}
+
+int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
+{
+	if (tpg->tpg_state != TPG_STATE_FREE) {
+		pr_err("Unable to add iSCSI Target Portal Group: %d"
+			" while not in TPG_STATE_FREE state.\n", tpg->tpgt);
+		return -EEXIST;
+	}
+	iscsit_set_default_tpg_attribs(tpg);
+
+	if (iscsi_create_default_params(&tpg->param_list) < 0)
+		goto err_out;
+
+	ISCSI_TPG_ATTRIB(tpg)->tpg = tpg;
+
+	spin_lock(&tpg->tpg_state_lock);
+	tpg->tpg_state	= TPG_STATE_INACTIVE;
+	spin_unlock(&tpg->tpg_state_lock);
+
+	spin_lock(&tiqn->tiqn_tpg_lock);
+	list_add_tail(&tpg->tpg_list, &tiqn->tiqn_tpg_list);
+	tiqn->tiqn_ntpgs++;
+	pr_debug("CORE[%s]_TPG[%hu] - Added iSCSI Target Portal Group\n",
+			tiqn->tiqn, tpg->tpgt);
+	spin_unlock(&tiqn->tiqn_tpg_lock);
+
+	return 0;
+err_out:
+	if (tpg->param_list) {
+		iscsi_release_param_list(tpg->param_list);
+		tpg->param_list = NULL;
+	}
+	kfree(tpg);
+	return -ENOMEM;
+}
+
+int iscsit_tpg_del_portal_group(
+	struct iscsi_tiqn *tiqn,
+	struct iscsi_portal_group *tpg,
+	int force)
+{
+	u8 old_state = tpg->tpg_state;
+
+	spin_lock(&tpg->tpg_state_lock);
+	tpg->tpg_state = TPG_STATE_INACTIVE;
+	spin_unlock(&tpg->tpg_state_lock);
+
+	if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
+		pr_err("Unable to delete iSCSI Target Portal Group:"
+			" %hu while active sessions exist, and force=0\n",
+			tpg->tpgt);
+		tpg->tpg_state = old_state;
+		return -EPERM;
+	}
+
+	core_tpg_clear_object_luns(&tpg->tpg_se_tpg);
+
+	if (tpg->param_list) {
+		iscsi_release_param_list(tpg->param_list);
+		tpg->param_list = NULL;
+	}
+
+	core_tpg_deregister(&tpg->tpg_se_tpg);
+
+	spin_lock(&tpg->tpg_state_lock);
+	tpg->tpg_state = TPG_STATE_FREE;
+	spin_unlock(&tpg->tpg_state_lock);
+
+	spin_lock(&tiqn->tiqn_tpg_lock);
+	tiqn->tiqn_ntpgs--;
+	list_del(&tpg->tpg_list);
+	spin_unlock(&tiqn->tiqn_tpg_lock);
+
+	pr_debug("CORE[%s]_TPG[%hu] - Deleted iSCSI Target Portal Group\n",
+			tiqn->tiqn, tpg->tpgt);
+
+	kfree(tpg);
+	return 0;
+}
+
+int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
+{
+	struct iscsi_param *param;
+	struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+	spin_lock(&tpg->tpg_state_lock);
+	if (tpg->tpg_state == TPG_STATE_ACTIVE) {
+		pr_err("iSCSI target portal group: %hu is already"
+			" active, ignoring request.\n", tpg->tpgt);
+		spin_unlock(&tpg->tpg_state_lock);
+		return -EINVAL;
+	}
+	/*
+	 * Make sure that AuthMethod does not contain None as an option
+	 * unless explictly disabled.  Set the default to CHAP if authentication
+	 * is enforced (as per default), and remove the NONE option.
+	 */
+	param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+	if (!param) {
+		spin_unlock(&tpg->tpg_state_lock);
+		return -ENOMEM;
+	}
+
+	if (ISCSI_TPG_ATTRIB(tpg)->authentication) {
+		if (!strcmp(param->value, NONE))
+			if (iscsi_update_param_value(param, CHAP) < 0) {
+				spin_unlock(&tpg->tpg_state_lock);
+				return -ENOMEM;
+			}
+		if (iscsit_ta_authentication(tpg, 1) < 0) {
+			spin_unlock(&tpg->tpg_state_lock);
+			return -ENOMEM;
+		}
+	}
+
+	tpg->tpg_state = TPG_STATE_ACTIVE;
+	spin_unlock(&tpg->tpg_state_lock);
+
+	spin_lock(&tiqn->tiqn_tpg_lock);
+	tiqn->tiqn_active_tpgs++;
+	pr_debug("iSCSI_TPG[%hu] - Enabled iSCSI Target Portal Group\n",
+			tpg->tpgt);
+	spin_unlock(&tiqn->tiqn_tpg_lock);
+
+	return 0;
+}
+
+int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force)
+{
+	struct iscsi_tiqn *tiqn;
+	u8 old_state = tpg->tpg_state;
+
+	spin_lock(&tpg->tpg_state_lock);
+	if (tpg->tpg_state == TPG_STATE_INACTIVE) {
+		pr_err("iSCSI Target Portal Group: %hu is already"
+			" inactive, ignoring request.\n", tpg->tpgt);
+		spin_unlock(&tpg->tpg_state_lock);
+		return -EINVAL;
+	}
+	tpg->tpg_state = TPG_STATE_INACTIVE;
+	spin_unlock(&tpg->tpg_state_lock);
+
+	iscsit_clear_tpg_np_login_threads(tpg);
+
+	if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
+		spin_lock(&tpg->tpg_state_lock);
+		tpg->tpg_state = old_state;
+		spin_unlock(&tpg->tpg_state_lock);
+		pr_err("Unable to disable iSCSI Target Portal Group:"
+			" %hu while active sessions exist, and force=0\n",
+			tpg->tpgt);
+		return -EPERM;
+	}
+
+	tiqn = tpg->tpg_tiqn;
+	if (!tiqn || (tpg == iscsit_global->discovery_tpg))
+		return 0;
+
+	spin_lock(&tiqn->tiqn_tpg_lock);
+	tiqn->tiqn_active_tpgs--;
+	pr_debug("iSCSI_TPG[%hu] - Disabled iSCSI Target Portal Group\n",
+			tpg->tpgt);
+	spin_unlock(&tiqn->tiqn_tpg_lock);
+
+	return 0;
+}
+
+struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(
+	struct iscsi_session *sess)
+{
+	struct se_session *se_sess = sess->se_sess;
+	struct se_node_acl *se_nacl = se_sess->se_node_acl;
+	struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl,
+					se_node_acl);
+
+	return &acl->node_attrib;
+}
+
+struct iscsi_tpg_np *iscsit_tpg_locate_child_np(
+	struct iscsi_tpg_np *tpg_np,
+	int network_transport)
+{
+	struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp;
+
+	spin_lock(&tpg_np->tpg_np_parent_lock);
+	list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp,
+			&tpg_np->tpg_np_parent_list, tpg_np_child_list) {
+		if (tpg_np_child->tpg_np->np_network_transport ==
+				network_transport) {
+			spin_unlock(&tpg_np->tpg_np_parent_lock);
+			return tpg_np_child;
+		}
+	}
+	spin_unlock(&tpg_np->tpg_np_parent_lock);
+
+	return NULL;
+}
+
+struct iscsi_tpg_np *iscsit_tpg_add_network_portal(
+	struct iscsi_portal_group *tpg,
+	struct __kernel_sockaddr_storage *sockaddr,
+	char *ip_str,
+	struct iscsi_tpg_np *tpg_np_parent,
+	int network_transport)
+{
+	struct iscsi_np *np;
+	struct iscsi_tpg_np *tpg_np;
+
+	tpg_np = kzalloc(sizeof(struct iscsi_tpg_np), GFP_KERNEL);
+	if (!tpg_np) {
+		pr_err("Unable to allocate memory for"
+				" struct iscsi_tpg_np.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	np = iscsit_add_np(sockaddr, ip_str, network_transport);
+	if (IS_ERR(np)) {
+		kfree(tpg_np);
+		return ERR_CAST(np);
+	}
+
+	INIT_LIST_HEAD(&tpg_np->tpg_np_list);
+	INIT_LIST_HEAD(&tpg_np->tpg_np_child_list);
+	INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list);
+	spin_lock_init(&tpg_np->tpg_np_parent_lock);
+	tpg_np->tpg_np		= np;
+	tpg_np->tpg		= tpg;
+
+	spin_lock(&tpg->tpg_np_lock);
+	list_add_tail(&tpg_np->tpg_np_list, &tpg->tpg_gnp_list);
+	tpg->num_tpg_nps++;
+	if (tpg->tpg_tiqn)
+		tpg->tpg_tiqn->tiqn_num_tpg_nps++;
+	spin_unlock(&tpg->tpg_np_lock);
+
+	if (tpg_np_parent) {
+		tpg_np->tpg_np_parent = tpg_np_parent;
+		spin_lock(&tpg_np_parent->tpg_np_parent_lock);
+		list_add_tail(&tpg_np->tpg_np_child_list,
+			&tpg_np_parent->tpg_np_parent_list);
+		spin_unlock(&tpg_np_parent->tpg_np_parent_lock);
+	}
+
+	pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n",
+		tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+		(np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP");
+
+	return tpg_np;
+}
+
+static int iscsit_tpg_release_np(
+	struct iscsi_tpg_np *tpg_np,
+	struct iscsi_portal_group *tpg,
+	struct iscsi_np *np)
+{
+	iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+
+	pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n",
+		tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+		(np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP");
+
+	tpg_np->tpg_np = NULL;
+	tpg_np->tpg = NULL;
+	kfree(tpg_np);
+	/*
+	 * iscsit_del_np() will shutdown struct iscsi_np when last TPG reference is released.
+	 */
+	return iscsit_del_np(np);
+}
+
+int iscsit_tpg_del_network_portal(
+	struct iscsi_portal_group *tpg,
+	struct iscsi_tpg_np *tpg_np)
+{
+	struct iscsi_np *np;
+	struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp;
+	int ret = 0;
+
+	np = tpg_np->tpg_np;
+	if (!np) {
+		pr_err("Unable to locate struct iscsi_np from"
+				" struct iscsi_tpg_np\n");
+		return -EINVAL;
+	}
+
+	if (!tpg_np->tpg_np_parent) {
+		/*
+		 * We are the parent tpg network portal.  Release all of the
+		 * child tpg_np's (eg: the non ISCSI_TCP ones) on our parent
+		 * list first.
+		 */
+		list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp,
+				&tpg_np->tpg_np_parent_list,
+				tpg_np_child_list) {
+			ret = iscsit_tpg_del_network_portal(tpg, tpg_np_child);
+			if (ret < 0)
+				pr_err("iscsit_tpg_del_network_portal()"
+					" failed: %d\n", ret);
+		}
+	} else {
+		/*
+		 * We are not the parent ISCSI_TCP tpg network portal.  Release
+		 * our own network portals from the child list.
+		 */
+		spin_lock(&tpg_np->tpg_np_parent->tpg_np_parent_lock);
+		list_del(&tpg_np->tpg_np_child_list);
+		spin_unlock(&tpg_np->tpg_np_parent->tpg_np_parent_lock);
+	}
+
+	spin_lock(&tpg->tpg_np_lock);
+	list_del(&tpg_np->tpg_np_list);
+	tpg->num_tpg_nps--;
+	if (tpg->tpg_tiqn)
+		tpg->tpg_tiqn->tiqn_num_tpg_nps--;
+	spin_unlock(&tpg->tpg_np_lock);
+
+	return iscsit_tpg_release_np(tpg_np, tpg, np);
+}
+
+int iscsit_tpg_set_initiator_node_queue_depth(
+	struct iscsi_portal_group *tpg,
+	unsigned char *initiatorname,
+	u32 queue_depth,
+	int force)
+{
+	return core_tpg_set_initiator_node_queue_depth(&tpg->tpg_se_tpg,
+		initiatorname, queue_depth, force);
+}
+
+int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication)
+{
+	unsigned char buf1[256], buf2[256], *none = NULL;
+	int len;
+	struct iscsi_param *param;
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((authentication != 1) && (authentication != 0)) {
+		pr_err("Illegal value for authentication parameter:"
+			" %u, ignoring request.\n", authentication);
+		return -1;
+	}
+
+	memset(buf1, 0, sizeof(buf1));
+	memset(buf2, 0, sizeof(buf2));
+
+	param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+	if (!param)
+		return -EINVAL;
+
+	if (authentication) {
+		snprintf(buf1, sizeof(buf1), "%s", param->value);
+		none = strstr(buf1, NONE);
+		if (!none)
+			goto out;
+		if (!strncmp(none + 4, ",", 1)) {
+			if (!strcmp(buf1, none))
+				sprintf(buf2, "%s", none+5);
+			else {
+				none--;
+				*none = '\0';
+				len = sprintf(buf2, "%s", buf1);
+				none += 5;
+				sprintf(buf2 + len, "%s", none);
+			}
+		} else {
+			none--;
+			*none = '\0';
+			sprintf(buf2, "%s", buf1);
+		}
+		if (iscsi_update_param_value(param, buf2) < 0)
+			return -EINVAL;
+	} else {
+		snprintf(buf1, sizeof(buf1), "%s", param->value);
+		none = strstr(buf1, NONE);
+		if ((none))
+			goto out;
+		strncat(buf1, ",", strlen(","));
+		strncat(buf1, NONE, strlen(NONE));
+		if (iscsi_update_param_value(param, buf1) < 0)
+			return -EINVAL;
+	}
+
+out:
+	a->authentication = authentication;
+	pr_debug("%s iSCSI Authentication Methods for TPG: %hu.\n",
+		a->authentication ? "Enforcing" : "Disabling", tpg->tpgt);
+
+	return 0;
+}
+
+int iscsit_ta_login_timeout(
+	struct iscsi_portal_group *tpg,
+	u32 login_timeout)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if (login_timeout > TA_LOGIN_TIMEOUT_MAX) {
+		pr_err("Requested Login Timeout %u larger than maximum"
+			" %u\n", login_timeout, TA_LOGIN_TIMEOUT_MAX);
+		return -EINVAL;
+	} else if (login_timeout < TA_LOGIN_TIMEOUT_MIN) {
+		pr_err("Requested Logout Timeout %u smaller than"
+			" minimum %u\n", login_timeout, TA_LOGIN_TIMEOUT_MIN);
+		return -EINVAL;
+	}
+
+	a->login_timeout = login_timeout;
+	pr_debug("Set Logout Timeout to %u for Target Portal Group"
+		" %hu\n", a->login_timeout, tpg->tpgt);
+
+	return 0;
+}
+
+int iscsit_ta_netif_timeout(
+	struct iscsi_portal_group *tpg,
+	u32 netif_timeout)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if (netif_timeout > TA_NETIF_TIMEOUT_MAX) {
+		pr_err("Requested Network Interface Timeout %u larger"
+			" than maximum %u\n", netif_timeout,
+				TA_NETIF_TIMEOUT_MAX);
+		return -EINVAL;
+	} else if (netif_timeout < TA_NETIF_TIMEOUT_MIN) {
+		pr_err("Requested Network Interface Timeout %u smaller"
+			" than minimum %u\n", netif_timeout,
+				TA_NETIF_TIMEOUT_MIN);
+		return -EINVAL;
+	}
+
+	a->netif_timeout = netif_timeout;
+	pr_debug("Set Network Interface Timeout to %u for"
+		" Target Portal Group %hu\n", a->netif_timeout, tpg->tpgt);
+
+	return 0;
+}
+
+int iscsit_ta_generate_node_acls(
+	struct iscsi_portal_group *tpg,
+	u32 flag)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((flag != 0) && (flag != 1)) {
+		pr_err("Illegal value %d\n", flag);
+		return -EINVAL;
+	}
+
+	a->generate_node_acls = flag;
+	pr_debug("iSCSI_TPG[%hu] - Generate Initiator Portal Group ACLs: %s\n",
+		tpg->tpgt, (a->generate_node_acls) ? "Enabled" : "Disabled");
+
+	return 0;
+}
+
+int iscsit_ta_default_cmdsn_depth(
+	struct iscsi_portal_group *tpg,
+	u32 tcq_depth)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if (tcq_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) {
+		pr_err("Requested Default Queue Depth: %u larger"
+			" than maximum %u\n", tcq_depth,
+				TA_DEFAULT_CMDSN_DEPTH_MAX);
+		return -EINVAL;
+	} else if (tcq_depth < TA_DEFAULT_CMDSN_DEPTH_MIN) {
+		pr_err("Requested Default Queue Depth: %u smaller"
+			" than minimum %u\n", tcq_depth,
+				TA_DEFAULT_CMDSN_DEPTH_MIN);
+		return -EINVAL;
+	}
+
+	a->default_cmdsn_depth = tcq_depth;
+	pr_debug("iSCSI_TPG[%hu] - Set Default CmdSN TCQ Depth to %u\n",
+		tpg->tpgt, a->default_cmdsn_depth);
+
+	return 0;
+}
+
+int iscsit_ta_cache_dynamic_acls(
+	struct iscsi_portal_group *tpg,
+	u32 flag)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((flag != 0) && (flag != 1)) {
+		pr_err("Illegal value %d\n", flag);
+		return -EINVAL;
+	}
+
+	a->cache_dynamic_acls = flag;
+	pr_debug("iSCSI_TPG[%hu] - Cache Dynamic Initiator Portal Group"
+		" ACLs %s\n", tpg->tpgt, (a->cache_dynamic_acls) ?
+		"Enabled" : "Disabled");
+
+	return 0;
+}
+
+int iscsit_ta_demo_mode_write_protect(
+	struct iscsi_portal_group *tpg,
+	u32 flag)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((flag != 0) && (flag != 1)) {
+		pr_err("Illegal value %d\n", flag);
+		return -EINVAL;
+	}
+
+	a->demo_mode_write_protect = flag;
+	pr_debug("iSCSI_TPG[%hu] - Demo Mode Write Protect bit: %s\n",
+		tpg->tpgt, (a->demo_mode_write_protect) ? "ON" : "OFF");
+
+	return 0;
+}
+
+int iscsit_ta_prod_mode_write_protect(
+	struct iscsi_portal_group *tpg,
+	u32 flag)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((flag != 0) && (flag != 1)) {
+		pr_err("Illegal value %d\n", flag);
+		return -EINVAL;
+	}
+
+	a->prod_mode_write_protect = flag;
+	pr_debug("iSCSI_TPG[%hu] - Production Mode Write Protect bit:"
+		" %s\n", tpg->tpgt, (a->prod_mode_write_protect) ?
+		"ON" : "OFF");
+
+	return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
new file mode 100644
index 0000000..dda48c1
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tpg.h
@@ -0,0 +1,41 @@
+#ifndef ISCSI_TARGET_TPG_H
+#define ISCSI_TARGET_TPG_H
+
+extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, u16);
+extern int iscsit_load_discovery_tpg(void);
+extern void iscsit_release_discovery_tpg(void);
+extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *,
+			struct iscsi_np *);
+extern int iscsit_get_tpg(struct iscsi_portal_group *);
+extern void iscsit_put_tpg(struct iscsi_portal_group *);
+extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *);
+extern void iscsit_tpg_dump_params(struct iscsi_portal_group *);
+extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *);
+extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *,
+			int);
+extern int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *);
+extern int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *, int);
+extern struct iscsi_node_acl *iscsit_tpg_add_initiator_node_acl(
+			struct iscsi_portal_group *, const char *, u32);
+extern void iscsit_tpg_del_initiator_node_acl(struct iscsi_portal_group *,
+			struct se_node_acl *);
+extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session *);
+extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *);
+extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int);
+extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *,
+			struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *,
+			int);
+extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *,
+			struct iscsi_tpg_np *);
+extern int iscsit_tpg_set_initiator_node_queue_depth(struct iscsi_portal_group *,
+			unsigned char *, u32, int);
+extern int iscsit_ta_authentication(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_login_timeout(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_netif_timeout(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_generate_node_acls(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32);
+
+#endif /* ISCSI_TARGET_TPG_H */
diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c
new file mode 100644
index 0000000..0baac5b
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tq.c
@@ -0,0 +1,551 @@
+/*******************************************************************************
+ * This file contains the iSCSI Login Thread and Thread Queue functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target.h"
+
+static LIST_HEAD(active_ts_list);
+static LIST_HEAD(inactive_ts_list);
+static DEFINE_SPINLOCK(active_ts_lock);
+static DEFINE_SPINLOCK(inactive_ts_lock);
+static DEFINE_SPINLOCK(ts_bitmap_lock);
+
+static void iscsi_add_ts_to_active_list(struct iscsi_thread_set *ts)
+{
+	spin_lock(&active_ts_lock);
+	list_add_tail(&ts->ts_list, &active_ts_list);
+	iscsit_global->active_ts++;
+	spin_unlock(&active_ts_lock);
+}
+
+extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts)
+{
+	spin_lock(&inactive_ts_lock);
+	list_add_tail(&ts->ts_list, &inactive_ts_list);
+	iscsit_global->inactive_ts++;
+	spin_unlock(&inactive_ts_lock);
+}
+
+static void iscsi_del_ts_from_active_list(struct iscsi_thread_set *ts)
+{
+	spin_lock(&active_ts_lock);
+	list_del(&ts->ts_list);
+	iscsit_global->active_ts--;
+	spin_unlock(&active_ts_lock);
+}
+
+static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void)
+{
+	struct iscsi_thread_set *ts;
+
+	spin_lock(&inactive_ts_lock);
+	if (list_empty(&inactive_ts_list)) {
+		spin_unlock(&inactive_ts_lock);
+		return NULL;
+	}
+
+	list_for_each_entry(ts, &inactive_ts_list, ts_list)
+		break;
+
+	list_del(&ts->ts_list);
+	iscsit_global->inactive_ts--;
+	spin_unlock(&inactive_ts_lock);
+
+	return ts;
+}
+
+extern int iscsi_allocate_thread_sets(u32 thread_pair_count)
+{
+	int allocated_thread_pair_count = 0, i, thread_id;
+	struct iscsi_thread_set *ts = NULL;
+
+	for (i = 0; i < thread_pair_count; i++) {
+		ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL);
+		if (!ts) {
+			pr_err("Unable to allocate memory for"
+					" thread set.\n");
+			return allocated_thread_pair_count;
+		}
+		/*
+		 * Locate the next available regision in the thread_set_bitmap
+		 */
+		spin_lock(&ts_bitmap_lock);
+		thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
+				iscsit_global->ts_bitmap_count, get_order(1));
+		spin_unlock(&ts_bitmap_lock);
+		if (thread_id < 0) {
+			pr_err("bitmap_find_free_region() failed for"
+				" thread_set_bitmap\n");
+			kfree(ts);
+			return allocated_thread_pair_count;
+		}
+
+		ts->thread_id = thread_id;
+		ts->status = ISCSI_THREAD_SET_FREE;
+		INIT_LIST_HEAD(&ts->ts_list);
+		spin_lock_init(&ts->ts_state_lock);
+		init_completion(&ts->rx_post_start_comp);
+		init_completion(&ts->tx_post_start_comp);
+		init_completion(&ts->rx_restart_comp);
+		init_completion(&ts->tx_restart_comp);
+		init_completion(&ts->rx_start_comp);
+		init_completion(&ts->tx_start_comp);
+
+		ts->create_threads = 1;
+		ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s",
+					ISCSI_TX_THREAD_NAME);
+		if (IS_ERR(ts->tx_thread)) {
+			dump_stack();
+			pr_err("Unable to start iscsi_target_tx_thread\n");
+			break;
+		}
+
+		ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s",
+					ISCSI_RX_THREAD_NAME);
+		if (IS_ERR(ts->rx_thread)) {
+			kthread_stop(ts->tx_thread);
+			pr_err("Unable to start iscsi_target_rx_thread\n");
+			break;
+		}
+		ts->create_threads = 0;
+
+		iscsi_add_ts_to_inactive_list(ts);
+		allocated_thread_pair_count++;
+	}
+
+	pr_debug("Spawned %d thread set(s) (%d total threads).\n",
+		allocated_thread_pair_count, allocated_thread_pair_count * 2);
+	return allocated_thread_pair_count;
+}
+
+extern void iscsi_deallocate_thread_sets(void)
+{
+	u32 released_count = 0;
+	struct iscsi_thread_set *ts = NULL;
+
+	while ((ts = iscsi_get_ts_from_inactive_list())) {
+
+		spin_lock_bh(&ts->ts_state_lock);
+		ts->status = ISCSI_THREAD_SET_DIE;
+		spin_unlock_bh(&ts->ts_state_lock);
+
+		if (ts->rx_thread) {
+			send_sig(SIGINT, ts->rx_thread, 1);
+			kthread_stop(ts->rx_thread);
+		}
+		if (ts->tx_thread) {
+			send_sig(SIGINT, ts->tx_thread, 1);
+			kthread_stop(ts->tx_thread);
+		}
+		/*
+		 * Release this thread_id in the thread_set_bitmap
+		 */
+		spin_lock(&ts_bitmap_lock);
+		bitmap_release_region(iscsit_global->ts_bitmap,
+				ts->thread_id, get_order(1));
+		spin_unlock(&ts_bitmap_lock);
+
+		released_count++;
+		kfree(ts);
+	}
+
+	if (released_count)
+		pr_debug("Stopped %d thread set(s) (%d total threads)."
+			"\n", released_count, released_count * 2);
+}
+
+static void iscsi_deallocate_extra_thread_sets(void)
+{
+	u32 orig_count, released_count = 0;
+	struct iscsi_thread_set *ts = NULL;
+
+	orig_count = TARGET_THREAD_SET_COUNT;
+
+	while ((iscsit_global->inactive_ts + 1) > orig_count) {
+		ts = iscsi_get_ts_from_inactive_list();
+		if (!ts)
+			break;
+
+		spin_lock_bh(&ts->ts_state_lock);
+		ts->status = ISCSI_THREAD_SET_DIE;
+		spin_unlock_bh(&ts->ts_state_lock);
+
+		if (ts->rx_thread) {
+			send_sig(SIGINT, ts->rx_thread, 1);
+			kthread_stop(ts->rx_thread);
+		}
+		if (ts->tx_thread) {
+			send_sig(SIGINT, ts->tx_thread, 1);
+			kthread_stop(ts->tx_thread);
+		}
+		/*
+		 * Release this thread_id in the thread_set_bitmap
+		 */
+		spin_lock(&ts_bitmap_lock);
+		bitmap_release_region(iscsit_global->ts_bitmap,
+				ts->thread_id, get_order(1));
+		spin_unlock(&ts_bitmap_lock);
+
+		released_count++;
+		kfree(ts);
+	}
+
+	if (released_count) {
+		pr_debug("Stopped %d thread set(s) (%d total threads)."
+			"\n", released_count, released_count * 2);
+	}
+}
+
+void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts)
+{
+	iscsi_add_ts_to_active_list(ts);
+
+	spin_lock_bh(&ts->ts_state_lock);
+	conn->thread_set = ts;
+	ts->conn = conn;
+	spin_unlock_bh(&ts->ts_state_lock);
+	/*
+	 * Start up the RX thread and wait on rx_post_start_comp.  The RX
+	 * Thread will then do the same for the TX Thread in
+	 * iscsi_rx_thread_pre_handler().
+	 */
+	complete(&ts->rx_start_comp);
+	wait_for_completion(&ts->rx_post_start_comp);
+}
+
+struct iscsi_thread_set *iscsi_get_thread_set(void)
+{
+	int allocate_ts = 0;
+	struct completion comp;
+	struct iscsi_thread_set *ts = NULL;
+	/*
+	 * If no inactive thread set is available on the first call to
+	 * iscsi_get_ts_from_inactive_list(), sleep for a second and
+	 * try again.  If still none are available after two attempts,
+	 * allocate a set ourselves.
+	 */
+get_set:
+	ts = iscsi_get_ts_from_inactive_list();
+	if (!ts) {
+		if (allocate_ts == 2)
+			iscsi_allocate_thread_sets(1);
+
+		init_completion(&comp);
+		wait_for_completion_timeout(&comp, 1 * HZ);
+
+		allocate_ts++;
+		goto get_set;
+	}
+
+	ts->delay_inactive = 1;
+	ts->signal_sent = 0;
+	ts->thread_count = 2;
+	init_completion(&ts->rx_restart_comp);
+	init_completion(&ts->tx_restart_comp);
+
+	return ts;
+}
+
+void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear)
+{
+	struct iscsi_thread_set *ts = NULL;
+
+	if (!conn->thread_set) {
+		pr_err("struct iscsi_conn->thread_set is NULL\n");
+		return;
+	}
+	ts = conn->thread_set;
+
+	spin_lock_bh(&ts->ts_state_lock);
+	ts->thread_clear &= ~thread_clear;
+
+	if ((thread_clear & ISCSI_CLEAR_RX_THREAD) &&
+	    (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD))
+		complete(&ts->rx_restart_comp);
+	else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) &&
+		 (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD))
+		complete(&ts->tx_restart_comp);
+	spin_unlock_bh(&ts->ts_state_lock);
+}
+
+void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent)
+{
+	struct iscsi_thread_set *ts = NULL;
+
+	if (!conn->thread_set) {
+		pr_err("struct iscsi_conn->thread_set is NULL\n");
+		return;
+	}
+	ts = conn->thread_set;
+
+	spin_lock_bh(&ts->ts_state_lock);
+	ts->signal_sent |= signal_sent;
+	spin_unlock_bh(&ts->ts_state_lock);
+}
+
+int iscsi_release_thread_set(struct iscsi_conn *conn)
+{
+	int thread_called = 0;
+	struct iscsi_thread_set *ts = NULL;
+
+	if (!conn || !conn->thread_set) {
+		pr_err("connection or thread set pointer is NULL\n");
+		BUG();
+	}
+	ts = conn->thread_set;
+
+	spin_lock_bh(&ts->ts_state_lock);
+	ts->status = ISCSI_THREAD_SET_RESET;
+
+	if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME,
+			strlen(ISCSI_RX_THREAD_NAME)))
+		thread_called = ISCSI_RX_THREAD;
+	else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME,
+			strlen(ISCSI_TX_THREAD_NAME)))
+		thread_called = ISCSI_TX_THREAD;
+
+	if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) &&
+	   (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) {
+
+		if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) {
+			send_sig(SIGINT, ts->rx_thread, 1);
+			ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
+		}
+		ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD;
+		spin_unlock_bh(&ts->ts_state_lock);
+		wait_for_completion(&ts->rx_restart_comp);
+		spin_lock_bh(&ts->ts_state_lock);
+		ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD;
+	}
+	if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) &&
+	   (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) {
+
+		if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) {
+			send_sig(SIGINT, ts->tx_thread, 1);
+			ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
+		}
+		ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD;
+		spin_unlock_bh(&ts->ts_state_lock);
+		wait_for_completion(&ts->tx_restart_comp);
+		spin_lock_bh(&ts->ts_state_lock);
+		ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD;
+	}
+
+	ts->conn = NULL;
+	ts->status = ISCSI_THREAD_SET_FREE;
+	spin_unlock_bh(&ts->ts_state_lock);
+
+	return 0;
+}
+
+int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn)
+{
+	struct iscsi_thread_set *ts;
+
+	if (!conn->thread_set)
+		return -1;
+	ts = conn->thread_set;
+
+	spin_lock_bh(&ts->ts_state_lock);
+	if (ts->status != ISCSI_THREAD_SET_ACTIVE) {
+		spin_unlock_bh(&ts->ts_state_lock);
+		return -1;
+	}
+
+	if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) {
+		send_sig(SIGINT, ts->tx_thread, 1);
+		ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
+	}
+	if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) {
+		send_sig(SIGINT, ts->rx_thread, 1);
+		ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
+	}
+	spin_unlock_bh(&ts->ts_state_lock);
+
+	return 0;
+}
+
+static void iscsi_check_to_add_additional_sets(void)
+{
+	int thread_sets_add;
+
+	spin_lock(&inactive_ts_lock);
+	thread_sets_add = iscsit_global->inactive_ts;
+	spin_unlock(&inactive_ts_lock);
+	if (thread_sets_add == 1)
+		iscsi_allocate_thread_sets(1);
+}
+
+static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+	spin_lock_bh(&ts->ts_state_lock);
+	if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) {
+		spin_unlock_bh(&ts->ts_state_lock);
+		return -1;
+	}
+	spin_unlock_bh(&ts->ts_state_lock);
+
+	return 0;
+}
+
+struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+	int ret;
+
+	spin_lock_bh(&ts->ts_state_lock);
+	if (ts->create_threads) {
+		spin_unlock_bh(&ts->ts_state_lock);
+		goto sleep;
+	}
+
+	flush_signals(current);
+
+	if (ts->delay_inactive && (--ts->thread_count == 0)) {
+		spin_unlock_bh(&ts->ts_state_lock);
+		iscsi_del_ts_from_active_list(ts);
+
+		if (!iscsit_global->in_shutdown)
+			iscsi_deallocate_extra_thread_sets();
+
+		iscsi_add_ts_to_inactive_list(ts);
+		spin_lock_bh(&ts->ts_state_lock);
+	}
+
+	if ((ts->status == ISCSI_THREAD_SET_RESET) &&
+	    (ts->thread_clear & ISCSI_CLEAR_RX_THREAD))
+		complete(&ts->rx_restart_comp);
+
+	ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD;
+	spin_unlock_bh(&ts->ts_state_lock);
+sleep:
+	ret = wait_for_completion_interruptible(&ts->rx_start_comp);
+	if (ret != 0)
+		return NULL;
+
+	if (iscsi_signal_thread_pre_handler(ts) < 0)
+		return NULL;
+
+	if (!ts->conn) {
+		pr_err("struct iscsi_thread_set->conn is NULL for"
+			" thread_id: %d, going back to sleep\n", ts->thread_id);
+		goto sleep;
+	}
+	iscsi_check_to_add_additional_sets();
+	/*
+	 * The RX Thread starts up the TX Thread and sleeps.
+	 */
+	ts->thread_clear |= ISCSI_CLEAR_RX_THREAD;
+	complete(&ts->tx_start_comp);
+	wait_for_completion(&ts->tx_post_start_comp);
+
+	return ts->conn;
+}
+
+struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+	int ret;
+
+	spin_lock_bh(&ts->ts_state_lock);
+	if (ts->create_threads) {
+		spin_unlock_bh(&ts->ts_state_lock);
+		goto sleep;
+	}
+
+	flush_signals(current);
+
+	if (ts->delay_inactive && (--ts->thread_count == 0)) {
+		spin_unlock_bh(&ts->ts_state_lock);
+		iscsi_del_ts_from_active_list(ts);
+
+		if (!iscsit_global->in_shutdown)
+			iscsi_deallocate_extra_thread_sets();
+
+		iscsi_add_ts_to_inactive_list(ts);
+		spin_lock_bh(&ts->ts_state_lock);
+	}
+	if ((ts->status == ISCSI_THREAD_SET_RESET) &&
+	    (ts->thread_clear & ISCSI_CLEAR_TX_THREAD))
+		complete(&ts->tx_restart_comp);
+
+	ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD;
+	spin_unlock_bh(&ts->ts_state_lock);
+sleep:
+	ret = wait_for_completion_interruptible(&ts->tx_start_comp);
+	if (ret != 0)
+		return NULL;
+
+	if (iscsi_signal_thread_pre_handler(ts) < 0)
+		return NULL;
+
+	if (!ts->conn) {
+		pr_err("struct iscsi_thread_set->conn is NULL for "
+			" thread_id: %d, going back to sleep\n",
+			ts->thread_id);
+		goto sleep;
+	}
+
+	iscsi_check_to_add_additional_sets();
+	/*
+	 * From the TX thread, up the tx_post_start_comp that the RX Thread is
+	 * sleeping on in iscsi_rx_thread_pre_handler(), then up the
+	 * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on.
+	 */
+	ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
+	complete(&ts->tx_post_start_comp);
+	complete(&ts->rx_post_start_comp);
+
+	spin_lock_bh(&ts->ts_state_lock);
+	ts->status = ISCSI_THREAD_SET_ACTIVE;
+	spin_unlock_bh(&ts->ts_state_lock);
+
+	return ts->conn;
+}
+
+int iscsi_thread_set_init(void)
+{
+	int size;
+
+	iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS;
+
+	size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long);
+	iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL);
+	if (!iscsit_global->ts_bitmap) {
+		pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&active_ts_lock);
+	spin_lock_init(&inactive_ts_lock);
+	spin_lock_init(&ts_bitmap_lock);
+	INIT_LIST_HEAD(&active_ts_list);
+	INIT_LIST_HEAD(&inactive_ts_list);
+
+	return 0;
+}
+
+void iscsi_thread_set_free(void)
+{
+	kfree(iscsit_global->ts_bitmap);
+}
diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h
new file mode 100644
index 0000000..26e6a95
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tq.h
@@ -0,0 +1,88 @@
+#ifndef ISCSI_THREAD_QUEUE_H
+#define ISCSI_THREAD_QUEUE_H
+
+/*
+ * Defines for thread sets.
+ */
+extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *);
+extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *);
+extern int iscsi_allocate_thread_sets(u32);
+extern void iscsi_deallocate_thread_sets(void);
+extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *);
+extern struct iscsi_thread_set *iscsi_get_thread_set(void);
+extern void iscsi_set_thread_clear(struct iscsi_conn *, u8);
+extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8);
+extern int iscsi_release_thread_set(struct iscsi_conn *);
+extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *);
+extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *);
+extern int iscsi_thread_set_init(void);
+extern void iscsi_thread_set_free(void);
+
+extern int iscsi_target_tx_thread(void *);
+extern int iscsi_target_rx_thread(void *);
+
+#define TARGET_THREAD_SET_COUNT			4
+
+#define ISCSI_RX_THREAD                         1
+#define ISCSI_TX_THREAD                         2
+#define ISCSI_RX_THREAD_NAME			"iscsi_trx"
+#define ISCSI_TX_THREAD_NAME			"iscsi_ttx"
+#define ISCSI_BLOCK_RX_THREAD			0x1
+#define ISCSI_BLOCK_TX_THREAD			0x2
+#define ISCSI_CLEAR_RX_THREAD			0x1
+#define ISCSI_CLEAR_TX_THREAD			0x2
+#define ISCSI_SIGNAL_RX_THREAD			0x1
+#define ISCSI_SIGNAL_TX_THREAD			0x2
+
+/* struct iscsi_thread_set->status */
+#define ISCSI_THREAD_SET_FREE			1
+#define ISCSI_THREAD_SET_ACTIVE			2
+#define ISCSI_THREAD_SET_DIE			3
+#define ISCSI_THREAD_SET_RESET			4
+#define ISCSI_THREAD_SET_DEALLOCATE_THREADS	5
+
+/* By default allow a maximum of 32K iSCSI connections */
+#define ISCSI_TS_BITMAP_BITS			32768
+
+struct iscsi_thread_set {
+	/* flags used for blocking and restarting sets */
+	int	blocked_threads;
+	/* flag for creating threads */
+	int	create_threads;
+	/* flag for delaying readding to inactive list */
+	int	delay_inactive;
+	/* status for thread set */
+	int	status;
+	/* which threads have had signals sent */
+	int	signal_sent;
+	/* flag for which threads exited first */
+	int	thread_clear;
+	/* Active threads in the thread set */
+	int	thread_count;
+	/* Unique thread ID */
+	u32	thread_id;
+	/* pointer to connection if set is active */
+	struct iscsi_conn	*conn;
+	/* used for controlling ts state accesses */
+	spinlock_t	ts_state_lock;
+	/* Used for rx side post startup */
+	struct completion	rx_post_start_comp;
+	/* Used for tx side post startup */
+	struct completion	tx_post_start_comp;
+	/* used for restarting thread queue */
+	struct completion	rx_restart_comp;
+	/* used for restarting thread queue */
+	struct completion	tx_restart_comp;
+	/* used for normal unused blocking */
+	struct completion	rx_start_comp;
+	/* used for normal unused blocking */
+	struct completion	tx_start_comp;
+	/* OS descriptor for rx thread */
+	struct task_struct	*rx_thread;
+	/* OS descriptor for tx thread */
+	struct task_struct	*tx_thread;
+	/* struct iscsi_thread_set in list list head*/
+	struct list_head	ts_list;
+};
+
+#endif   /*** ISCSI_THREAD_QUEUE_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
new file mode 100644
index 0000000..a1acb01
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -0,0 +1,1819 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target specific utility functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_tmr.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+#define PRINT_BUFF(buff, len)					\
+{								\
+	int zzz;						\
+								\
+	pr_debug("%d:\n", __LINE__);				\
+	for (zzz = 0; zzz < len; zzz++) {			\
+		if (zzz % 16 == 0) {				\
+			if (zzz)				\
+				pr_debug("\n");			\
+			pr_debug("%4i: ", zzz);			\
+		}						\
+		pr_debug("%02x ", (unsigned char) (buff)[zzz]);	\
+	}							\
+	if ((len + 1) % 16)					\
+		pr_debug("\n");					\
+}
+
+extern struct list_head g_tiqn_list;
+extern spinlock_t tiqn_lock;
+
+/*
+ *	Called with cmd->r2t_lock held.
+ */
+int iscsit_add_r2t_to_list(
+	struct iscsi_cmd *cmd,
+	u32 offset,
+	u32 xfer_len,
+	int recovery,
+	u32 r2t_sn)
+{
+	struct iscsi_r2t *r2t;
+
+	r2t = kmem_cache_zalloc(lio_r2t_cache, GFP_ATOMIC);
+	if (!r2t) {
+		pr_err("Unable to allocate memory for struct iscsi_r2t.\n");
+		return -1;
+	}
+	INIT_LIST_HEAD(&r2t->r2t_list);
+
+	r2t->recovery_r2t = recovery;
+	r2t->r2t_sn = (!r2t_sn) ? cmd->r2t_sn++ : r2t_sn;
+	r2t->offset = offset;
+	r2t->xfer_len = xfer_len;
+	list_add_tail(&r2t->r2t_list, &cmd->cmd_r2t_list);
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T);
+
+	spin_lock_bh(&cmd->r2t_lock);
+	return 0;
+}
+
+struct iscsi_r2t *iscsit_get_r2t_for_eos(
+	struct iscsi_cmd *cmd,
+	u32 offset,
+	u32 length)
+{
+	struct iscsi_r2t *r2t;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+		if ((r2t->offset <= offset) &&
+		    (r2t->offset + r2t->xfer_len) >= (offset + length)) {
+			spin_unlock_bh(&cmd->r2t_lock);
+			return r2t;
+		}
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	pr_err("Unable to locate R2T for Offset: %u, Length:"
+			" %u\n", offset, length);
+	return NULL;
+}
+
+struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *cmd)
+{
+	struct iscsi_r2t *r2t;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+		if (!r2t->sent_r2t) {
+			spin_unlock_bh(&cmd->r2t_lock);
+			return r2t;
+		}
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	pr_err("Unable to locate next R2T to send for ITT:"
+			" 0x%08x.\n", cmd->init_task_tag);
+	return NULL;
+}
+
+/*
+ *	Called with cmd->r2t_lock held.
+ */
+void iscsit_free_r2t(struct iscsi_r2t *r2t, struct iscsi_cmd *cmd)
+{
+	list_del(&r2t->r2t_list);
+	kmem_cache_free(lio_r2t_cache, r2t);
+}
+
+void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
+{
+	struct iscsi_r2t *r2t, *r2t_tmp;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list)
+		iscsit_free_r2t(r2t, cmd);
+	spin_unlock_bh(&cmd->r2t_lock);
+}
+
+/*
+ * May be called from software interrupt (timer) context for allocating
+ * iSCSI NopINs.
+ */
+struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask)
+{
+	struct iscsi_cmd *cmd;
+
+	cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask);
+	if (!cmd) {
+		pr_err("Unable to allocate memory for struct iscsi_cmd.\n");
+		return NULL;
+	}
+
+	cmd->conn	= conn;
+	INIT_LIST_HEAD(&cmd->i_list);
+	INIT_LIST_HEAD(&cmd->datain_list);
+	INIT_LIST_HEAD(&cmd->cmd_r2t_list);
+	init_completion(&cmd->reject_comp);
+	spin_lock_init(&cmd->datain_lock);
+	spin_lock_init(&cmd->dataout_timeout_lock);
+	spin_lock_init(&cmd->istate_lock);
+	spin_lock_init(&cmd->error_lock);
+	spin_lock_init(&cmd->r2t_lock);
+
+	return cmd;
+}
+
+/*
+ * Called from iscsi_handle_scsi_cmd()
+ */
+struct iscsi_cmd *iscsit_allocate_se_cmd(
+	struct iscsi_conn *conn,
+	u32 data_length,
+	int data_direction,
+	int iscsi_task_attr)
+{
+	struct iscsi_cmd *cmd;
+	struct se_cmd *se_cmd;
+	int sam_task_attr;
+
+	cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+	if (!cmd)
+		return NULL;
+
+	cmd->data_direction = data_direction;
+	cmd->data_length = data_length;
+	/*
+	 * Figure out the SAM Task Attribute for the incoming SCSI CDB
+	 */
+	if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) ||
+	    (iscsi_task_attr == ISCSI_ATTR_SIMPLE))
+		sam_task_attr = MSG_SIMPLE_TAG;
+	else if (iscsi_task_attr == ISCSI_ATTR_ORDERED)
+		sam_task_attr = MSG_ORDERED_TAG;
+	else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE)
+		sam_task_attr = MSG_HEAD_TAG;
+	else if (iscsi_task_attr == ISCSI_ATTR_ACA)
+		sam_task_attr = MSG_ACA_TAG;
+	else {
+		pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using"
+			" MSG_SIMPLE_TAG\n", iscsi_task_attr);
+		sam_task_attr = MSG_SIMPLE_TAG;
+	}
+
+	se_cmd = &cmd->se_cmd;
+	/*
+	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+	 */
+	transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops,
+			conn->sess->se_sess, data_length, data_direction,
+			sam_task_attr, &cmd->sense_buffer[0]);
+	return cmd;
+}
+
+struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
+	struct iscsi_conn *conn,
+	u8 function)
+{
+	struct iscsi_cmd *cmd;
+	struct se_cmd *se_cmd;
+	u8 tcm_function;
+
+	cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+	if (!cmd)
+		return NULL;
+
+	cmd->data_direction = DMA_NONE;
+
+	cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL);
+	if (!cmd->tmr_req) {
+		pr_err("Unable to allocate memory for"
+			" Task Management command!\n");
+		return NULL;
+	}
+	/*
+	 * TASK_REASSIGN for ERL=2 / connection stays inside of
+	 * LIO-Target $FABRIC_MOD
+	 */
+	if (function == ISCSI_TM_FUNC_TASK_REASSIGN)
+		return cmd;
+
+	se_cmd = &cmd->se_cmd;
+	/*
+	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+	 */
+	transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops,
+				conn->sess->se_sess, 0, DMA_NONE,
+				MSG_SIMPLE_TAG, &cmd->sense_buffer[0]);
+
+	switch (function) {
+	case ISCSI_TM_FUNC_ABORT_TASK:
+		tcm_function = TMR_ABORT_TASK;
+		break;
+	case ISCSI_TM_FUNC_ABORT_TASK_SET:
+		tcm_function = TMR_ABORT_TASK_SET;
+		break;
+	case ISCSI_TM_FUNC_CLEAR_ACA:
+		tcm_function = TMR_CLEAR_ACA;
+		break;
+	case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+		tcm_function = TMR_CLEAR_TASK_SET;
+		break;
+	case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+		tcm_function = TMR_LUN_RESET;
+		break;
+	case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+		tcm_function = TMR_TARGET_WARM_RESET;
+		break;
+	case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+		tcm_function = TMR_TARGET_COLD_RESET;
+		break;
+	default:
+		pr_err("Unknown iSCSI TMR Function:"
+			" 0x%02x\n", function);
+		goto out;
+	}
+
+	se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd,
+				(void *)cmd->tmr_req, tcm_function);
+	if (!se_cmd->se_tmr_req)
+		goto out;
+
+	cmd->tmr_req->se_tmr_req = se_cmd->se_tmr_req;
+
+	return cmd;
+out:
+	iscsit_release_cmd(cmd);
+	if (se_cmd)
+		transport_free_se_cmd(se_cmd);
+	return NULL;
+}
+
+int iscsit_decide_list_to_build(
+	struct iscsi_cmd *cmd,
+	u32 immediate_data_length)
+{
+	struct iscsi_build_list bl;
+	struct iscsi_conn *conn = cmd->conn;
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na;
+
+	if (sess->sess_ops->DataSequenceInOrder &&
+	    sess->sess_ops->DataPDUInOrder)
+		return 0;
+
+	if (cmd->data_direction == DMA_NONE)
+		return 0;
+
+	na = iscsit_tpg_get_node_attrib(sess);
+	memset(&bl, 0, sizeof(struct iscsi_build_list));
+
+	if (cmd->data_direction == DMA_FROM_DEVICE) {
+		bl.data_direction = ISCSI_PDU_READ;
+		bl.type = PDULIST_NORMAL;
+		if (na->random_datain_pdu_offsets)
+			bl.randomize |= RANDOM_DATAIN_PDU_OFFSETS;
+		if (na->random_datain_seq_offsets)
+			bl.randomize |= RANDOM_DATAIN_SEQ_OFFSETS;
+	} else {
+		bl.data_direction = ISCSI_PDU_WRITE;
+		bl.immediate_data_length = immediate_data_length;
+		if (na->random_r2t_offsets)
+			bl.randomize |= RANDOM_R2T_OFFSETS;
+
+		if (!cmd->immediate_data && !cmd->unsolicited_data)
+			bl.type = PDULIST_NORMAL;
+		else if (cmd->immediate_data && !cmd->unsolicited_data)
+			bl.type = PDULIST_IMMEDIATE;
+		else if (!cmd->immediate_data && cmd->unsolicited_data)
+			bl.type = PDULIST_UNSOLICITED;
+		else if (cmd->immediate_data && cmd->unsolicited_data)
+			bl.type = PDULIST_IMMEDIATE_AND_UNSOLICITED;
+	}
+
+	return iscsit_do_build_list(cmd, &bl);
+}
+
+struct iscsi_seq *iscsit_get_seq_holder_for_datain(
+	struct iscsi_cmd *cmd,
+	u32 seq_send_order)
+{
+	u32 i;
+
+	for (i = 0; i < cmd->seq_count; i++)
+		if (cmd->seq_list[i].seq_send_order == seq_send_order)
+			return &cmd->seq_list[i];
+
+	return NULL;
+}
+
+struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *cmd)
+{
+	u32 i;
+
+	if (!cmd->seq_list) {
+		pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+		return NULL;
+	}
+
+	for (i = 0; i < cmd->seq_count; i++) {
+		if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+			continue;
+		if (cmd->seq_list[i].seq_send_order == cmd->seq_send_order) {
+			cmd->seq_send_order++;
+			return &cmd->seq_list[i];
+		}
+	}
+
+	return NULL;
+}
+
+struct iscsi_r2t *iscsit_get_holder_for_r2tsn(
+	struct iscsi_cmd *cmd,
+	u32 r2t_sn)
+{
+	struct iscsi_r2t *r2t;
+
+	spin_lock_bh(&cmd->r2t_lock);
+	list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+		if (r2t->r2t_sn == r2t_sn) {
+			spin_unlock_bh(&cmd->r2t_lock);
+			return r2t;
+		}
+	}
+	spin_unlock_bh(&cmd->r2t_lock);
+
+	return NULL;
+}
+
+static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn)
+{
+	int ret;
+
+	/*
+	 * This is the proper method of checking received CmdSN against
+	 * ExpCmdSN and MaxCmdSN values, as well as accounting for out
+	 * or order CmdSNs due to multiple connection sessions and/or
+	 * CRC failures.
+	 */
+	if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) {
+		pr_err("Received CmdSN: 0x%08x is greater than"
+		       " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn,
+		       sess->max_cmd_sn);
+		ret = CMDSN_ERROR_CANNOT_RECOVER;
+
+	} else if (cmdsn == sess->exp_cmd_sn) {
+		sess->exp_cmd_sn++;
+		pr_debug("Received CmdSN matches ExpCmdSN,"
+		      " incremented ExpCmdSN to: 0x%08x\n",
+		      sess->exp_cmd_sn);
+		ret = CMDSN_NORMAL_OPERATION;
+
+	} else if (iscsi_sna_gt(cmdsn, sess->exp_cmd_sn)) {
+		pr_debug("Received CmdSN: 0x%08x is greater"
+		      " than ExpCmdSN: 0x%08x, not acknowledging.\n",
+		      cmdsn, sess->exp_cmd_sn);
+		ret = CMDSN_HIGHER_THAN_EXP;
+
+	} else {
+		pr_err("Received CmdSN: 0x%08x is less than"
+		       " ExpCmdSN: 0x%08x, ignoring.\n", cmdsn,
+		       sess->exp_cmd_sn);
+		ret = CMDSN_LOWER_THAN_EXP;
+	}
+
+	return ret;
+}
+
+/*
+ * Commands may be received out of order if MC/S is in use.
+ * Ensure they are executed in CmdSN order.
+ */
+int iscsit_sequence_cmd(
+	struct iscsi_conn *conn,
+	struct iscsi_cmd *cmd,
+	u32 cmdsn)
+{
+	int ret;
+	int cmdsn_ret;
+
+	mutex_lock(&conn->sess->cmdsn_mutex);
+
+	cmdsn_ret = iscsit_check_received_cmdsn(conn->sess, cmdsn);
+	switch (cmdsn_ret) {
+	case CMDSN_NORMAL_OPERATION:
+		ret = iscsit_execute_cmd(cmd, 0);
+		if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list))
+			iscsit_execute_ooo_cmdsns(conn->sess);
+		break;
+	case CMDSN_HIGHER_THAN_EXP:
+		ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, cmdsn);
+		break;
+	case CMDSN_LOWER_THAN_EXP:
+		cmd->i_state = ISTATE_REMOVE;
+		iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+		ret = cmdsn_ret;
+		break;
+	default:
+		ret = cmdsn_ret;
+		break;
+	}
+	mutex_unlock(&conn->sess->cmdsn_mutex);
+
+	return ret;
+}
+
+int iscsit_check_unsolicited_dataout(struct iscsi_cmd *cmd, unsigned char *buf)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+	struct iscsi_data *hdr = (struct iscsi_data *) buf;
+	u32 payload_length = ntoh24(hdr->dlength);
+
+	if (conn->sess->sess_ops->InitialR2T) {
+		pr_err("Received unexpected unsolicited data"
+			" while InitialR2T=Yes, protocol error.\n");
+		transport_send_check_condition_and_sense(se_cmd,
+				TCM_UNEXPECTED_UNSOLICITED_DATA, 0);
+		return -1;
+	}
+
+	if ((cmd->first_burst_len + payload_length) >
+	     conn->sess->sess_ops->FirstBurstLength) {
+		pr_err("Total %u bytes exceeds FirstBurstLength: %u"
+			" for this Unsolicited DataOut Burst.\n",
+			(cmd->first_burst_len + payload_length),
+				conn->sess->sess_ops->FirstBurstLength);
+		transport_send_check_condition_and_sense(se_cmd,
+				TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+		return -1;
+	}
+
+	if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))
+		return 0;
+
+	if (((cmd->first_burst_len + payload_length) != cmd->data_length) &&
+	    ((cmd->first_burst_len + payload_length) !=
+	      conn->sess->sess_ops->FirstBurstLength)) {
+		pr_err("Unsolicited non-immediate data received %u"
+			" does not equal FirstBurstLength: %u, and does"
+			" not equal ExpXferLen %u.\n",
+			(cmd->first_burst_len + payload_length),
+			conn->sess->sess_ops->FirstBurstLength, cmd->data_length);
+		transport_send_check_condition_and_sense(se_cmd,
+				TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+		return -1;
+	}
+	return 0;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_itt(
+	struct iscsi_conn *conn,
+	u32 init_task_tag)
+{
+	struct iscsi_cmd *cmd;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+		if (cmd->init_task_tag == init_task_tag) {
+			spin_unlock_bh(&conn->cmd_lock);
+			return cmd;
+		}
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+
+	pr_err("Unable to locate ITT: 0x%08x on CID: %hu",
+			init_task_tag, conn->cid);
+	return NULL;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(
+	struct iscsi_conn *conn,
+	u32 init_task_tag,
+	u32 length)
+{
+	struct iscsi_cmd *cmd;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+		if (cmd->init_task_tag == init_task_tag) {
+			spin_unlock_bh(&conn->cmd_lock);
+			return cmd;
+		}
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+
+	pr_err("Unable to locate ITT: 0x%08x on CID: %hu,"
+			" dumping payload\n", init_task_tag, conn->cid);
+	if (length)
+		iscsit_dump_data_payload(conn, length, 1);
+
+	return NULL;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_ttt(
+	struct iscsi_conn *conn,
+	u32 targ_xfer_tag)
+{
+	struct iscsi_cmd *cmd = NULL;
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+		if (cmd->targ_xfer_tag == targ_xfer_tag) {
+			spin_unlock_bh(&conn->cmd_lock);
+			return cmd;
+		}
+	}
+	spin_unlock_bh(&conn->cmd_lock);
+
+	pr_err("Unable to locate TTT: 0x%08x on CID: %hu\n",
+			targ_xfer_tag, conn->cid);
+	return NULL;
+}
+
+int iscsit_find_cmd_for_recovery(
+	struct iscsi_session *sess,
+	struct iscsi_cmd **cmd_ptr,
+	struct iscsi_conn_recovery **cr_ptr,
+	u32 init_task_tag)
+{
+	struct iscsi_cmd *cmd = NULL;
+	struct iscsi_conn_recovery *cr;
+	/*
+	 * Scan through the inactive connection recovery list's command list.
+	 * If init_task_tag matches the command is still alligent.
+	 */
+	spin_lock(&sess->cr_i_lock);
+	list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) {
+		spin_lock(&cr->conn_recovery_cmd_lock);
+		list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) {
+			if (cmd->init_task_tag == init_task_tag) {
+				spin_unlock(&cr->conn_recovery_cmd_lock);
+				spin_unlock(&sess->cr_i_lock);
+
+				*cr_ptr = cr;
+				*cmd_ptr = cmd;
+				return -2;
+			}
+		}
+		spin_unlock(&cr->conn_recovery_cmd_lock);
+	}
+	spin_unlock(&sess->cr_i_lock);
+	/*
+	 * Scan through the active connection recovery list's command list.
+	 * If init_task_tag matches the command is ready to be reassigned.
+	 */
+	spin_lock(&sess->cr_a_lock);
+	list_for_each_entry(cr, &sess->cr_active_list, cr_list) {
+		spin_lock(&cr->conn_recovery_cmd_lock);
+		list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) {
+			if (cmd->init_task_tag == init_task_tag) {
+				spin_unlock(&cr->conn_recovery_cmd_lock);
+				spin_unlock(&sess->cr_a_lock);
+
+				*cr_ptr = cr;
+				*cmd_ptr = cmd;
+				return 0;
+			}
+		}
+		spin_unlock(&cr->conn_recovery_cmd_lock);
+	}
+	spin_unlock(&sess->cr_a_lock);
+
+	return -1;
+}
+
+void iscsit_add_cmd_to_immediate_queue(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn,
+	u8 state)
+{
+	struct iscsi_queue_req *qr;
+
+	qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC);
+	if (!qr) {
+		pr_err("Unable to allocate memory for"
+				" struct iscsi_queue_req\n");
+		return;
+	}
+	INIT_LIST_HEAD(&qr->qr_list);
+	qr->cmd = cmd;
+	qr->state = state;
+
+	spin_lock_bh(&conn->immed_queue_lock);
+	list_add_tail(&qr->qr_list, &conn->immed_queue_list);
+	atomic_inc(&cmd->immed_queue_count);
+	atomic_set(&conn->check_immediate_queue, 1);
+	spin_unlock_bh(&conn->immed_queue_lock);
+
+	wake_up_process(conn->thread_set->tx_thread);
+}
+
+struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn)
+{
+	struct iscsi_queue_req *qr;
+
+	spin_lock_bh(&conn->immed_queue_lock);
+	if (list_empty(&conn->immed_queue_list)) {
+		spin_unlock_bh(&conn->immed_queue_lock);
+		return NULL;
+	}
+	list_for_each_entry(qr, &conn->immed_queue_list, qr_list)
+		break;
+
+	list_del(&qr->qr_list);
+	if (qr->cmd)
+		atomic_dec(&qr->cmd->immed_queue_count);
+	spin_unlock_bh(&conn->immed_queue_lock);
+
+	return qr;
+}
+
+static void iscsit_remove_cmd_from_immediate_queue(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_queue_req *qr, *qr_tmp;
+
+	spin_lock_bh(&conn->immed_queue_lock);
+	if (!atomic_read(&cmd->immed_queue_count)) {
+		spin_unlock_bh(&conn->immed_queue_lock);
+		return;
+	}
+
+	list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) {
+		if (qr->cmd != cmd)
+			continue;
+
+		atomic_dec(&qr->cmd->immed_queue_count);
+		list_del(&qr->qr_list);
+		kmem_cache_free(lio_qr_cache, qr);
+	}
+	spin_unlock_bh(&conn->immed_queue_lock);
+
+	if (atomic_read(&cmd->immed_queue_count)) {
+		pr_err("ITT: 0x%08x immed_queue_count: %d\n",
+			cmd->init_task_tag,
+			atomic_read(&cmd->immed_queue_count));
+	}
+}
+
+void iscsit_add_cmd_to_response_queue(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn,
+	u8 state)
+{
+	struct iscsi_queue_req *qr;
+
+	qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC);
+	if (!qr) {
+		pr_err("Unable to allocate memory for"
+			" struct iscsi_queue_req\n");
+		return;
+	}
+	INIT_LIST_HEAD(&qr->qr_list);
+	qr->cmd = cmd;
+	qr->state = state;
+
+	spin_lock_bh(&conn->response_queue_lock);
+	list_add_tail(&qr->qr_list, &conn->response_queue_list);
+	atomic_inc(&cmd->response_queue_count);
+	spin_unlock_bh(&conn->response_queue_lock);
+
+	wake_up_process(conn->thread_set->tx_thread);
+}
+
+struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn)
+{
+	struct iscsi_queue_req *qr;
+
+	spin_lock_bh(&conn->response_queue_lock);
+	if (list_empty(&conn->response_queue_list)) {
+		spin_unlock_bh(&conn->response_queue_lock);
+		return NULL;
+	}
+
+	list_for_each_entry(qr, &conn->response_queue_list, qr_list)
+		break;
+
+	list_del(&qr->qr_list);
+	if (qr->cmd)
+		atomic_dec(&qr->cmd->response_queue_count);
+	spin_unlock_bh(&conn->response_queue_lock);
+
+	return qr;
+}
+
+static void iscsit_remove_cmd_from_response_queue(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct iscsi_queue_req *qr, *qr_tmp;
+
+	spin_lock_bh(&conn->response_queue_lock);
+	if (!atomic_read(&cmd->response_queue_count)) {
+		spin_unlock_bh(&conn->response_queue_lock);
+		return;
+	}
+
+	list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list,
+				qr_list) {
+		if (qr->cmd != cmd)
+			continue;
+
+		atomic_dec(&qr->cmd->response_queue_count);
+		list_del(&qr->qr_list);
+		kmem_cache_free(lio_qr_cache, qr);
+	}
+	spin_unlock_bh(&conn->response_queue_lock);
+
+	if (atomic_read(&cmd->response_queue_count)) {
+		pr_err("ITT: 0x%08x response_queue_count: %d\n",
+			cmd->init_task_tag,
+			atomic_read(&cmd->response_queue_count));
+	}
+}
+
+void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn)
+{
+	struct iscsi_queue_req *qr, *qr_tmp;
+
+	spin_lock_bh(&conn->immed_queue_lock);
+	list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) {
+		list_del(&qr->qr_list);
+		if (qr->cmd)
+			atomic_dec(&qr->cmd->immed_queue_count);
+
+		kmem_cache_free(lio_qr_cache, qr);
+	}
+	spin_unlock_bh(&conn->immed_queue_lock);
+
+	spin_lock_bh(&conn->response_queue_lock);
+	list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list,
+			qr_list) {
+		list_del(&qr->qr_list);
+		if (qr->cmd)
+			atomic_dec(&qr->cmd->response_queue_count);
+
+		kmem_cache_free(lio_qr_cache, qr);
+	}
+	spin_unlock_bh(&conn->response_queue_lock);
+}
+
+void iscsit_release_cmd(struct iscsi_cmd *cmd)
+{
+	struct iscsi_conn *conn = cmd->conn;
+	int i;
+
+	iscsit_free_r2ts_from_list(cmd);
+	iscsit_free_all_datain_reqs(cmd);
+
+	kfree(cmd->buf_ptr);
+	kfree(cmd->pdu_list);
+	kfree(cmd->seq_list);
+	kfree(cmd->tmr_req);
+	kfree(cmd->iov_data);
+
+	for (i = 0; i < cmd->t_mem_sg_nents; i++)
+		__free_page(sg_page(&cmd->t_mem_sg[i]));
+
+	kfree(cmd->t_mem_sg);
+
+	if (conn) {
+		iscsit_remove_cmd_from_immediate_queue(cmd, conn);
+		iscsit_remove_cmd_from_response_queue(cmd, conn);
+	}
+
+	kmem_cache_free(lio_cmd_cache, cmd);
+}
+
+int iscsit_check_session_usage_count(struct iscsi_session *sess)
+{
+	spin_lock_bh(&sess->session_usage_lock);
+	if (sess->session_usage_count != 0) {
+		sess->session_waiting_on_uc = 1;
+		spin_unlock_bh(&sess->session_usage_lock);
+		if (in_interrupt())
+			return 2;
+
+		wait_for_completion(&sess->session_waiting_on_uc_comp);
+		return 1;
+	}
+	spin_unlock_bh(&sess->session_usage_lock);
+
+	return 0;
+}
+
+void iscsit_dec_session_usage_count(struct iscsi_session *sess)
+{
+	spin_lock_bh(&sess->session_usage_lock);
+	sess->session_usage_count--;
+
+	if (!sess->session_usage_count && sess->session_waiting_on_uc)
+		complete(&sess->session_waiting_on_uc_comp);
+
+	spin_unlock_bh(&sess->session_usage_lock);
+}
+
+void iscsit_inc_session_usage_count(struct iscsi_session *sess)
+{
+	spin_lock_bh(&sess->session_usage_lock);
+	sess->session_usage_count++;
+	spin_unlock_bh(&sess->session_usage_lock);
+}
+
+/*
+ *	Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker
+ *	array counts needed for sync and steering.
+ */
+static int iscsit_determine_sync_and_steering_counts(
+	struct iscsi_conn *conn,
+	struct iscsi_data_count *count)
+{
+	u32 length = count->data_length;
+	u32 marker, markint;
+
+	count->sync_and_steering = 1;
+
+	marker = (count->type == ISCSI_RX_DATA) ?
+			conn->of_marker : conn->if_marker;
+	markint = (count->type == ISCSI_RX_DATA) ?
+			(conn->conn_ops->OFMarkInt * 4) :
+			(conn->conn_ops->IFMarkInt * 4);
+	count->ss_iov_count = count->iov_count;
+
+	while (length > 0) {
+		if (length >= marker) {
+			count->ss_iov_count += 3;
+			count->ss_marker_count += 2;
+
+			length -= marker;
+			marker = markint;
+		} else
+			length = 0;
+	}
+
+	return 0;
+}
+
+/*
+ *	Setup conn->if_marker and conn->of_marker values based upon
+ *	the initial marker-less interval. (see iSCSI v19 A.2)
+ */
+int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn)
+{
+	int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0;
+	/*
+	 * IFMarkInt and OFMarkInt are negotiated as 32-bit words.
+	 */
+	u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4);
+	u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4);
+
+	if (conn->conn_ops->OFMarker) {
+		/*
+		 * Account for the first Login Command received not
+		 * via iscsi_recv_msg().
+		 */
+		conn->of_marker += ISCSI_HDR_LEN;
+		if (conn->of_marker <= OFMarkInt) {
+			conn->of_marker = (OFMarkInt - conn->of_marker);
+		} else {
+			login_ofmarker_count = (conn->of_marker / OFMarkInt);
+			next_marker = (OFMarkInt * (login_ofmarker_count + 1)) +
+					(login_ofmarker_count * MARKER_SIZE);
+			conn->of_marker = (next_marker - conn->of_marker);
+		}
+		conn->of_marker_offset = 0;
+		pr_debug("Setting OFMarker value to %u based on Initial"
+			" Markerless Interval.\n", conn->of_marker);
+	}
+
+	if (conn->conn_ops->IFMarker) {
+		if (conn->if_marker <= IFMarkInt) {
+			conn->if_marker = (IFMarkInt - conn->if_marker);
+		} else {
+			login_ifmarker_count = (conn->if_marker / IFMarkInt);
+			next_marker = (IFMarkInt * (login_ifmarker_count + 1)) +
+					(login_ifmarker_count * MARKER_SIZE);
+			conn->if_marker = (next_marker - conn->if_marker);
+		}
+		pr_debug("Setting IFMarker value to %u based on Initial"
+			" Markerless Interval.\n", conn->if_marker);
+	}
+
+	return 0;
+}
+
+struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid)
+{
+	struct iscsi_conn *conn;
+
+	spin_lock_bh(&sess->conn_lock);
+	list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+		if ((conn->cid == cid) &&
+		    (conn->conn_state == TARG_CONN_STATE_LOGGED_IN)) {
+			iscsit_inc_conn_usage_count(conn);
+			spin_unlock_bh(&sess->conn_lock);
+			return conn;
+		}
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	return NULL;
+}
+
+struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *sess, u16 cid)
+{
+	struct iscsi_conn *conn;
+
+	spin_lock_bh(&sess->conn_lock);
+	list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+		if (conn->cid == cid) {
+			iscsit_inc_conn_usage_count(conn);
+			spin_lock(&conn->state_lock);
+			atomic_set(&conn->connection_wait_rcfr, 1);
+			spin_unlock(&conn->state_lock);
+			spin_unlock_bh(&sess->conn_lock);
+			return conn;
+		}
+	}
+	spin_unlock_bh(&sess->conn_lock);
+
+	return NULL;
+}
+
+void iscsit_check_conn_usage_count(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->conn_usage_lock);
+	if (conn->conn_usage_count != 0) {
+		conn->conn_waiting_on_uc = 1;
+		spin_unlock_bh(&conn->conn_usage_lock);
+
+		wait_for_completion(&conn->conn_waiting_on_uc_comp);
+		return;
+	}
+	spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+void iscsit_dec_conn_usage_count(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->conn_usage_lock);
+	conn->conn_usage_count--;
+
+	if (!conn->conn_usage_count && conn->conn_waiting_on_uc)
+		complete(&conn->conn_waiting_on_uc_comp);
+
+	spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+void iscsit_inc_conn_usage_count(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->conn_usage_lock);
+	conn->conn_usage_count++;
+	spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+static int iscsit_add_nopin(struct iscsi_conn *conn, int want_response)
+{
+	u8 state;
+	struct iscsi_cmd *cmd;
+
+	cmd = iscsit_allocate_cmd(conn, GFP_ATOMIC);
+	if (!cmd)
+		return -1;
+
+	cmd->iscsi_opcode = ISCSI_OP_NOOP_IN;
+	state = (want_response) ? ISTATE_SEND_NOPIN_WANT_RESPONSE :
+				ISTATE_SEND_NOPIN_NO_RESPONSE;
+	cmd->init_task_tag = 0xFFFFFFFF;
+	spin_lock_bh(&conn->sess->ttt_lock);
+	cmd->targ_xfer_tag = (want_response) ? conn->sess->targ_xfer_tag++ :
+			0xFFFFFFFF;
+	if (want_response && (cmd->targ_xfer_tag == 0xFFFFFFFF))
+		cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+	spin_unlock_bh(&conn->sess->ttt_lock);
+
+	spin_lock_bh(&conn->cmd_lock);
+	list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+	spin_unlock_bh(&conn->cmd_lock);
+
+	if (want_response)
+		iscsit_start_nopin_response_timer(conn);
+	iscsit_add_cmd_to_immediate_queue(cmd, conn, state);
+
+	return 0;
+}
+
+static void iscsit_handle_nopin_response_timeout(unsigned long data)
+{
+	struct iscsi_conn *conn = (struct iscsi_conn *) data;
+
+	iscsit_inc_conn_usage_count(conn);
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (conn->nopin_response_timer_flags & ISCSI_TF_STOP) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		iscsit_dec_conn_usage_count(conn);
+		return;
+	}
+
+	pr_debug("Did not receive response to NOPIN on CID: %hu on"
+		" SID: %u, failing connection.\n", conn->cid,
+			conn->sess->sid);
+	conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING;
+	spin_unlock_bh(&conn->nopin_timer_lock);
+
+	{
+	struct iscsi_portal_group *tpg = conn->sess->tpg;
+	struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+	if (tiqn) {
+		spin_lock_bh(&tiqn->sess_err_stats.lock);
+		strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name,
+				(void *)conn->sess->sess_ops->InitiatorName);
+		tiqn->sess_err_stats.last_sess_failure_type =
+				ISCSI_SESS_ERR_CXN_TIMEOUT;
+		tiqn->sess_err_stats.cxn_timeout_errors++;
+		conn->sess->conn_timeout_errors++;
+		spin_unlock_bh(&tiqn->sess_err_stats.lock);
+	}
+	}
+
+	iscsit_cause_connection_reinstatement(conn, 0);
+	iscsit_dec_conn_usage_count(conn);
+}
+
+void iscsit_mod_nopin_response_timer(struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		return;
+	}
+
+	mod_timer(&conn->nopin_response_timer,
+		(get_jiffies_64() + na->nopin_response_timeout * HZ));
+	spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+/*
+ *	Called with conn->nopin_timer_lock held.
+ */
+void iscsit_start_nopin_response_timer(struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (conn->nopin_response_timer_flags & ISCSI_TF_RUNNING) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		return;
+	}
+
+	init_timer(&conn->nopin_response_timer);
+	conn->nopin_response_timer.expires =
+		(get_jiffies_64() + na->nopin_response_timeout * HZ);
+	conn->nopin_response_timer.data = (unsigned long)conn;
+	conn->nopin_response_timer.function = iscsit_handle_nopin_response_timeout;
+	conn->nopin_response_timer_flags &= ~ISCSI_TF_STOP;
+	conn->nopin_response_timer_flags |= ISCSI_TF_RUNNING;
+	add_timer(&conn->nopin_response_timer);
+
+	pr_debug("Started NOPIN Response Timer on CID: %d to %u"
+		" seconds\n", conn->cid, na->nopin_response_timeout);
+	spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+void iscsit_stop_nopin_response_timer(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		return;
+	}
+	conn->nopin_response_timer_flags |= ISCSI_TF_STOP;
+	spin_unlock_bh(&conn->nopin_timer_lock);
+
+	del_timer_sync(&conn->nopin_response_timer);
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING;
+	spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+static void iscsit_handle_nopin_timeout(unsigned long data)
+{
+	struct iscsi_conn *conn = (struct iscsi_conn *) data;
+
+	iscsit_inc_conn_usage_count(conn);
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (conn->nopin_timer_flags & ISCSI_TF_STOP) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		iscsit_dec_conn_usage_count(conn);
+		return;
+	}
+	conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING;
+	spin_unlock_bh(&conn->nopin_timer_lock);
+
+	iscsit_add_nopin(conn, 1);
+	iscsit_dec_conn_usage_count(conn);
+}
+
+/*
+ * Called with conn->nopin_timer_lock held.
+ */
+void __iscsit_start_nopin_timer(struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+	/*
+	* NOPIN timeout is disabled.
+	 */
+	if (!na->nopin_timeout)
+		return;
+
+	if (conn->nopin_timer_flags & ISCSI_TF_RUNNING)
+		return;
+
+	init_timer(&conn->nopin_timer);
+	conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ);
+	conn->nopin_timer.data = (unsigned long)conn;
+	conn->nopin_timer.function = iscsit_handle_nopin_timeout;
+	conn->nopin_timer_flags &= ~ISCSI_TF_STOP;
+	conn->nopin_timer_flags |= ISCSI_TF_RUNNING;
+	add_timer(&conn->nopin_timer);
+
+	pr_debug("Started NOPIN Timer on CID: %d at %u second"
+		" interval\n", conn->cid, na->nopin_timeout);
+}
+
+void iscsit_start_nopin_timer(struct iscsi_conn *conn)
+{
+	struct iscsi_session *sess = conn->sess;
+	struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+	/*
+	 * NOPIN timeout is disabled..
+	 */
+	if (!na->nopin_timeout)
+		return;
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		return;
+	}
+
+	init_timer(&conn->nopin_timer);
+	conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ);
+	conn->nopin_timer.data = (unsigned long)conn;
+	conn->nopin_timer.function = iscsit_handle_nopin_timeout;
+	conn->nopin_timer_flags &= ~ISCSI_TF_STOP;
+	conn->nopin_timer_flags |= ISCSI_TF_RUNNING;
+	add_timer(&conn->nopin_timer);
+
+	pr_debug("Started NOPIN Timer on CID: %d at %u second"
+			" interval\n", conn->cid, na->nopin_timeout);
+	spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+void iscsit_stop_nopin_timer(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->nopin_timer_lock);
+	if (!(conn->nopin_timer_flags & ISCSI_TF_RUNNING)) {
+		spin_unlock_bh(&conn->nopin_timer_lock);
+		return;
+	}
+	conn->nopin_timer_flags |= ISCSI_TF_STOP;
+	spin_unlock_bh(&conn->nopin_timer_lock);
+
+	del_timer_sync(&conn->nopin_timer);
+
+	spin_lock_bh(&conn->nopin_timer_lock);
+	conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING;
+	spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+int iscsit_send_tx_data(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn,
+	int use_misc)
+{
+	int tx_sent, tx_size;
+	u32 iov_count;
+	struct kvec *iov;
+
+send_data:
+	tx_size = cmd->tx_size;
+
+	if (!use_misc) {
+		iov = &cmd->iov_data[0];
+		iov_count = cmd->iov_data_count;
+	} else {
+		iov = &cmd->iov_misc[0];
+		iov_count = cmd->iov_misc_count;
+	}
+
+	tx_sent = tx_data(conn, &iov[0], iov_count, tx_size);
+	if (tx_size != tx_sent) {
+		if (tx_sent == -EAGAIN) {
+			pr_err("tx_data() returned -EAGAIN\n");
+			goto send_data;
+		} else
+			return -1;
+	}
+	cmd->tx_size = 0;
+
+	return 0;
+}
+
+int iscsit_fe_sendpage_sg(
+	struct iscsi_cmd *cmd,
+	struct iscsi_conn *conn)
+{
+	struct scatterlist *sg = cmd->first_data_sg;
+	struct kvec iov;
+	u32 tx_hdr_size, data_len;
+	u32 offset = cmd->first_data_sg_off;
+	int tx_sent;
+
+send_hdr:
+	tx_hdr_size = ISCSI_HDR_LEN;
+	if (conn->conn_ops->HeaderDigest)
+		tx_hdr_size += ISCSI_CRC_LEN;
+
+	iov.iov_base = cmd->pdu;
+	iov.iov_len = tx_hdr_size;
+
+	tx_sent = tx_data(conn, &iov, 1, tx_hdr_size);
+	if (tx_hdr_size != tx_sent) {
+		if (tx_sent == -EAGAIN) {
+			pr_err("tx_data() returned -EAGAIN\n");
+			goto send_hdr;
+		}
+		return -1;
+	}
+
+	data_len = cmd->tx_size - tx_hdr_size - cmd->padding;
+	if (conn->conn_ops->DataDigest)
+		data_len -= ISCSI_CRC_LEN;
+
+	/*
+	 * Perform sendpage() for each page in the scatterlist
+	 */
+	while (data_len) {
+		u32 space = (sg->length - offset);
+		u32 sub_len = min_t(u32, data_len, space);
+send_pg:
+		tx_sent = conn->sock->ops->sendpage(conn->sock,
+					sg_page(sg), sg->offset + offset, sub_len, 0);
+		if (tx_sent != sub_len) {
+			if (tx_sent == -EAGAIN) {
+				pr_err("tcp_sendpage() returned"
+						" -EAGAIN\n");
+				goto send_pg;
+			}
+
+			pr_err("tcp_sendpage() failure: %d\n",
+					tx_sent);
+			return -1;
+		}
+
+		data_len -= sub_len;
+		offset = 0;
+		sg = sg_next(sg);
+	}
+
+send_padding:
+	if (cmd->padding) {
+		struct kvec *iov_p =
+			&cmd->iov_data[cmd->iov_data_count-1];
+
+		tx_sent = tx_data(conn, iov_p, 1, cmd->padding);
+		if (cmd->padding != tx_sent) {
+			if (tx_sent == -EAGAIN) {
+				pr_err("tx_data() returned -EAGAIN\n");
+				goto send_padding;
+			}
+			return -1;
+		}
+	}
+
+send_datacrc:
+	if (conn->conn_ops->DataDigest) {
+		struct kvec *iov_d =
+			&cmd->iov_data[cmd->iov_data_count];
+
+		tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN);
+		if (ISCSI_CRC_LEN != tx_sent) {
+			if (tx_sent == -EAGAIN) {
+				pr_err("tx_data() returned -EAGAIN\n");
+				goto send_datacrc;
+			}
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *      This function is used for mainly sending a ISCSI_TARG_LOGIN_RSP PDU
+ *      back to the Initiator when an expection condition occurs with the
+ *      errors set in status_class and status_detail.
+ *
+ *      Parameters:     iSCSI Connection, Status Class, Status Detail.
+ *      Returns:        0 on success, -1 on error.
+ */
+int iscsit_tx_login_rsp(struct iscsi_conn *conn, u8 status_class, u8 status_detail)
+{
+	u8 iscsi_hdr[ISCSI_HDR_LEN];
+	int err;
+	struct kvec iov;
+	struct iscsi_login_rsp *hdr;
+
+	iscsit_collect_login_stats(conn, status_class, status_detail);
+
+	memset(&iov, 0, sizeof(struct kvec));
+	memset(&iscsi_hdr, 0x0, ISCSI_HDR_LEN);
+
+	hdr	= (struct iscsi_login_rsp *)&iscsi_hdr;
+	hdr->opcode		= ISCSI_OP_LOGIN_RSP;
+	hdr->status_class	= status_class;
+	hdr->status_detail	= status_detail;
+	hdr->itt		= cpu_to_be32(conn->login_itt);
+
+	iov.iov_base		= &iscsi_hdr;
+	iov.iov_len		= ISCSI_HDR_LEN;
+
+	PRINT_BUFF(iscsi_hdr, ISCSI_HDR_LEN);
+
+	err = tx_data(conn, &iov, 1, ISCSI_HDR_LEN);
+	if (err != ISCSI_HDR_LEN) {
+		pr_err("tx_data returned less than expected\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void iscsit_print_session_params(struct iscsi_session *sess)
+{
+	struct iscsi_conn *conn;
+
+	pr_debug("-----------------------------[Session Params for"
+		" SID: %u]-----------------------------\n", sess->sid);
+	spin_lock_bh(&sess->conn_lock);
+	list_for_each_entry(conn, &sess->sess_conn_list, conn_list)
+		iscsi_dump_conn_ops(conn->conn_ops);
+	spin_unlock_bh(&sess->conn_lock);
+
+	iscsi_dump_sess_ops(sess->sess_ops);
+}
+
+static int iscsit_do_rx_data(
+	struct iscsi_conn *conn,
+	struct iscsi_data_count *count)
+{
+	int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
+	u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0;
+	struct kvec iov[count->ss_iov_count], *iov_p;
+	struct msghdr msg;
+
+	if (!conn || !conn->sock || !conn->conn_ops)
+		return -1;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+
+	if (count->sync_and_steering) {
+		int size = 0;
+		u32 i, orig_iov_count = 0;
+		u32 orig_iov_len = 0, orig_iov_loc = 0;
+		u32 iov_count = 0, per_iov_bytes = 0;
+		u32 *rx_marker, old_rx_marker = 0;
+		struct kvec *iov_record;
+
+		memset(&rx_marker_val, 0,
+				count->ss_marker_count * sizeof(u32));
+		memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
+
+		iov_record = count->iov;
+		orig_iov_count = count->iov_count;
+		rx_marker = &conn->of_marker;
+
+		i = 0;
+		size = data;
+		orig_iov_len = iov_record[orig_iov_loc].iov_len;
+		while (size > 0) {
+			pr_debug("rx_data: #1 orig_iov_len %u,"
+			" orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
+			pr_debug("rx_data: #2 rx_marker %u, size"
+				" %u\n", *rx_marker, size);
+
+			if (orig_iov_len >= *rx_marker) {
+				iov[iov_count].iov_len = *rx_marker;
+				iov[iov_count++].iov_base =
+					(iov_record[orig_iov_loc].iov_base +
+						per_iov_bytes);
+
+				iov[iov_count].iov_len = (MARKER_SIZE / 2);
+				iov[iov_count++].iov_base =
+					&rx_marker_val[rx_marker_iov++];
+				iov[iov_count].iov_len = (MARKER_SIZE / 2);
+				iov[iov_count++].iov_base =
+					&rx_marker_val[rx_marker_iov++];
+				old_rx_marker = *rx_marker;
+
+				/*
+				 * OFMarkInt is in 32-bit words.
+				 */
+				*rx_marker = (conn->conn_ops->OFMarkInt * 4);
+				size -= old_rx_marker;
+				orig_iov_len -= old_rx_marker;
+				per_iov_bytes += old_rx_marker;
+
+				pr_debug("rx_data: #3 new_rx_marker"
+					" %u, size %u\n", *rx_marker, size);
+			} else {
+				iov[iov_count].iov_len = orig_iov_len;
+				iov[iov_count++].iov_base =
+					(iov_record[orig_iov_loc].iov_base +
+						per_iov_bytes);
+
+				per_iov_bytes = 0;
+				*rx_marker -= orig_iov_len;
+				size -= orig_iov_len;
+
+				if (size)
+					orig_iov_len =
+					iov_record[++orig_iov_loc].iov_len;
+
+				pr_debug("rx_data: #4 new_rx_marker"
+					" %u, size %u\n", *rx_marker, size);
+			}
+		}
+		data += (rx_marker_iov * (MARKER_SIZE / 2));
+
+		iov_p	= &iov[0];
+		iov_len	= iov_count;
+
+		if (iov_count > count->ss_iov_count) {
+			pr_err("iov_count: %d, count->ss_iov_count:"
+				" %d\n", iov_count, count->ss_iov_count);
+			return -1;
+		}
+		if (rx_marker_iov > count->ss_marker_count) {
+			pr_err("rx_marker_iov: %d, count->ss_marker"
+				"_count: %d\n", rx_marker_iov,
+				count->ss_marker_count);
+			return -1;
+		}
+	} else {
+		iov_p = count->iov;
+		iov_len	= count->iov_count;
+	}
+
+	while (total_rx < data) {
+		rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
+					(data - total_rx), MSG_WAITALL);
+		if (rx_loop <= 0) {
+			pr_debug("rx_loop: %d total_rx: %d\n",
+				rx_loop, total_rx);
+			return rx_loop;
+		}
+		total_rx += rx_loop;
+		pr_debug("rx_loop: %d, total_rx: %d, data: %d\n",
+				rx_loop, total_rx, data);
+	}
+
+	if (count->sync_and_steering) {
+		int j;
+		for (j = 0; j < rx_marker_iov; j++) {
+			pr_debug("rx_data: #5 j: %d, offset: %d\n",
+				j, rx_marker_val[j]);
+			conn->of_marker_offset = rx_marker_val[j];
+		}
+		total_rx -= (rx_marker_iov * (MARKER_SIZE / 2));
+	}
+
+	return total_rx;
+}
+
+static int iscsit_do_tx_data(
+	struct iscsi_conn *conn,
+	struct iscsi_data_count *count)
+{
+	int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
+	u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0;
+	struct kvec iov[count->ss_iov_count], *iov_p;
+	struct msghdr msg;
+
+	if (!conn || !conn->sock || !conn->conn_ops)
+		return -1;
+
+	if (data <= 0) {
+		pr_err("Data length is: %d\n", data);
+		return -1;
+	}
+
+	memset(&msg, 0, sizeof(struct msghdr));
+
+	if (count->sync_and_steering) {
+		int size = 0;
+		u32 i, orig_iov_count = 0;
+		u32 orig_iov_len = 0, orig_iov_loc = 0;
+		u32 iov_count = 0, per_iov_bytes = 0;
+		u32 *tx_marker, old_tx_marker = 0;
+		struct kvec *iov_record;
+
+		memset(&tx_marker_val, 0,
+			count->ss_marker_count * sizeof(u32));
+		memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
+
+		iov_record = count->iov;
+		orig_iov_count = count->iov_count;
+		tx_marker = &conn->if_marker;
+
+		i = 0;
+		size = data;
+		orig_iov_len = iov_record[orig_iov_loc].iov_len;
+		while (size > 0) {
+			pr_debug("tx_data: #1 orig_iov_len %u,"
+			" orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
+			pr_debug("tx_data: #2 tx_marker %u, size"
+				" %u\n", *tx_marker, size);
+
+			if (orig_iov_len >= *tx_marker) {
+				iov[iov_count].iov_len = *tx_marker;
+				iov[iov_count++].iov_base =
+					(iov_record[orig_iov_loc].iov_base +
+						per_iov_bytes);
+
+				tx_marker_val[tx_marker_iov] =
+						(size - *tx_marker);
+				iov[iov_count].iov_len = (MARKER_SIZE / 2);
+				iov[iov_count++].iov_base =
+					&tx_marker_val[tx_marker_iov++];
+				iov[iov_count].iov_len = (MARKER_SIZE / 2);
+				iov[iov_count++].iov_base =
+					&tx_marker_val[tx_marker_iov++];
+				old_tx_marker = *tx_marker;
+
+				/*
+				 * IFMarkInt is in 32-bit words.
+				 */
+				*tx_marker = (conn->conn_ops->IFMarkInt * 4);
+				size -= old_tx_marker;
+				orig_iov_len -= old_tx_marker;
+				per_iov_bytes += old_tx_marker;
+
+				pr_debug("tx_data: #3 new_tx_marker"
+					" %u, size %u\n", *tx_marker, size);
+				pr_debug("tx_data: #4 offset %u\n",
+					tx_marker_val[tx_marker_iov-1]);
+			} else {
+				iov[iov_count].iov_len = orig_iov_len;
+				iov[iov_count++].iov_base
+					= (iov_record[orig_iov_loc].iov_base +
+						per_iov_bytes);
+
+				per_iov_bytes = 0;
+				*tx_marker -= orig_iov_len;
+				size -= orig_iov_len;
+
+				if (size)
+					orig_iov_len =
+					iov_record[++orig_iov_loc].iov_len;
+
+				pr_debug("tx_data: #5 new_tx_marker"
+					" %u, size %u\n", *tx_marker, size);
+			}
+		}
+
+		data += (tx_marker_iov * (MARKER_SIZE / 2));
+
+		iov_p = &iov[0];
+		iov_len = iov_count;
+
+		if (iov_count > count->ss_iov_count) {
+			pr_err("iov_count: %d, count->ss_iov_count:"
+				" %d\n", iov_count, count->ss_iov_count);
+			return -1;
+		}
+		if (tx_marker_iov > count->ss_marker_count) {
+			pr_err("tx_marker_iov: %d, count->ss_marker"
+				"_count: %d\n", tx_marker_iov,
+				count->ss_marker_count);
+			return -1;
+		}
+	} else {
+		iov_p = count->iov;
+		iov_len = count->iov_count;
+	}
+
+	while (total_tx < data) {
+		tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
+					(data - total_tx));
+		if (tx_loop <= 0) {
+			pr_debug("tx_loop: %d total_tx %d\n",
+				tx_loop, total_tx);
+			return tx_loop;
+		}
+		total_tx += tx_loop;
+		pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
+					tx_loop, total_tx, data);
+	}
+
+	if (count->sync_and_steering)
+		total_tx -= (tx_marker_iov * (MARKER_SIZE / 2));
+
+	return total_tx;
+}
+
+int rx_data(
+	struct iscsi_conn *conn,
+	struct kvec *iov,
+	int iov_count,
+	int data)
+{
+	struct iscsi_data_count c;
+
+	if (!conn || !conn->sock || !conn->conn_ops)
+		return -1;
+
+	memset(&c, 0, sizeof(struct iscsi_data_count));
+	c.iov = iov;
+	c.iov_count = iov_count;
+	c.data_length = data;
+	c.type = ISCSI_RX_DATA;
+
+	if (conn->conn_ops->OFMarker &&
+	   (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
+		if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
+			return -1;
+	}
+
+	return iscsit_do_rx_data(conn, &c);
+}
+
+int tx_data(
+	struct iscsi_conn *conn,
+	struct kvec *iov,
+	int iov_count,
+	int data)
+{
+	struct iscsi_data_count c;
+
+	if (!conn || !conn->sock || !conn->conn_ops)
+		return -1;
+
+	memset(&c, 0, sizeof(struct iscsi_data_count));
+	c.iov = iov;
+	c.iov_count = iov_count;
+	c.data_length = data;
+	c.type = ISCSI_TX_DATA;
+
+	if (conn->conn_ops->IFMarker &&
+	   (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
+		if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
+			return -1;
+	}
+
+	return iscsit_do_tx_data(conn, &c);
+}
+
+void iscsit_collect_login_stats(
+	struct iscsi_conn *conn,
+	u8 status_class,
+	u8 status_detail)
+{
+	struct iscsi_param *intrname = NULL;
+	struct iscsi_tiqn *tiqn;
+	struct iscsi_login_stats *ls;
+
+	tiqn = iscsit_snmp_get_tiqn(conn);
+	if (!tiqn)
+		return;
+
+	ls = &tiqn->login_stats;
+
+	spin_lock(&ls->lock);
+	if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) &&
+	    ((get_jiffies_64() - ls->last_fail_time) < 10)) {
+		/* We already have the failure info for this login */
+		spin_unlock(&ls->lock);
+		return;
+	}
+
+	if (status_class == ISCSI_STATUS_CLS_SUCCESS)
+		ls->accepts++;
+	else if (status_class == ISCSI_STATUS_CLS_REDIRECT) {
+		ls->redirects++;
+		ls->last_fail_type = ISCSI_LOGIN_FAIL_REDIRECT;
+	} else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR)  &&
+		 (status_detail == ISCSI_LOGIN_STATUS_AUTH_FAILED)) {
+		ls->authenticate_fails++;
+		ls->last_fail_type =  ISCSI_LOGIN_FAIL_AUTHENTICATE;
+	} else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR)  &&
+		 (status_detail == ISCSI_LOGIN_STATUS_TGT_FORBIDDEN)) {
+		ls->authorize_fails++;
+		ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHORIZE;
+	} else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) &&
+		 (status_detail == ISCSI_LOGIN_STATUS_INIT_ERR)) {
+		ls->negotiate_fails++;
+		ls->last_fail_type = ISCSI_LOGIN_FAIL_NEGOTIATE;
+	} else {
+		ls->other_fails++;
+		ls->last_fail_type = ISCSI_LOGIN_FAIL_OTHER;
+	}
+
+	/* Save initiator name, ip address and time, if it is a failed login */
+	if (status_class != ISCSI_STATUS_CLS_SUCCESS) {
+		if (conn->param_list)
+			intrname = iscsi_find_param_from_key(INITIATORNAME,
+							     conn->param_list);
+		strcpy(ls->last_intr_fail_name,
+		       (intrname ? intrname->value : "Unknown"));
+
+		ls->last_intr_fail_ip_family = conn->sock->sk->sk_family;
+		snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE,
+				"%s", conn->login_ip);
+		ls->last_fail_time = get_jiffies_64();
+	}
+
+	spin_unlock(&ls->lock);
+}
+
+struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn)
+{
+	struct iscsi_portal_group *tpg;
+
+	if (!conn || !conn->sess)
+		return NULL;
+
+	tpg = conn->sess->tpg;
+	if (!tpg)
+		return NULL;
+
+	if (!tpg->tpg_tiqn)
+		return NULL;
+
+	return tpg->tpg_tiqn;
+}
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
new file mode 100644
index 0000000..2cd49d6
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -0,0 +1,60 @@
+#ifndef ISCSI_TARGET_UTIL_H
+#define ISCSI_TARGET_UTIL_H
+
+#define MARKER_SIZE	8
+
+extern int iscsit_add_r2t_to_list(struct iscsi_cmd *, u32, u32, int, u32);
+extern struct iscsi_r2t *iscsit_get_r2t_for_eos(struct iscsi_cmd *, u32, u32);
+extern struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *);
+extern void iscsit_free_r2t(struct iscsi_r2t *, struct iscsi_cmd *);
+extern void iscsit_free_r2ts_from_list(struct iscsi_cmd *);
+extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t);
+extern struct iscsi_cmd *iscsit_allocate_se_cmd(struct iscsi_conn *, u32, int, int);
+extern struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(struct iscsi_conn *, u8);
+extern int iscsit_decide_list_to_build(struct iscsi_cmd *, u32);
+extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32);
+extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *);
+extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32);
+int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, u32 cmdsn);
+extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *);
+extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, u32);
+extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *,
+			u32, u32);
+extern struct iscsi_cmd *iscsit_find_cmd_from_ttt(struct iscsi_conn *, u32);
+extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd **,
+			struct iscsi_conn_recovery **, u32);
+extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *);
+extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *);
+extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *);
+extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
+extern void iscsit_release_cmd(struct iscsi_cmd *);
+extern int iscsit_check_session_usage_count(struct iscsi_session *);
+extern void iscsit_dec_session_usage_count(struct iscsi_session *);
+extern void iscsit_inc_session_usage_count(struct iscsi_session *);
+extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *);
+extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16);
+extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16);
+extern void iscsit_check_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_dec_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_inc_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_mod_nopin_response_timer(struct iscsi_conn *);
+extern void iscsit_start_nopin_response_timer(struct iscsi_conn *);
+extern void iscsit_stop_nopin_response_timer(struct iscsi_conn *);
+extern void __iscsit_start_nopin_timer(struct iscsi_conn *);
+extern void iscsit_start_nopin_timer(struct iscsi_conn *);
+extern void iscsit_stop_nopin_timer(struct iscsi_conn *);
+extern int iscsit_send_tx_data(struct iscsi_cmd *, struct iscsi_conn *, int);
+extern int iscsit_fe_sendpage_sg(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_tx_login_rsp(struct iscsi_conn *, u8, u8);
+extern void iscsit_print_session_params(struct iscsi_session *);
+extern int iscsit_print_dev_to_proc(char *, char **, off_t, int);
+extern int iscsit_print_sessions_to_proc(char *, char **, off_t, int);
+extern int iscsit_print_tpg_to_proc(char *, char **, off_t, int);
+extern int rx_data(struct iscsi_conn *, struct kvec *, int, int);
+extern int tx_data(struct iscsi_conn *, struct kvec *, int, int);
+extern void iscsit_collect_login_stats(struct iscsi_conn *, u8, u8);
+extern struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_UTIL_H ***/
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 46352d6..c75a01a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -4052,17 +4052,16 @@
 	struct se_task *task;
 	struct se_device *dev = cmd->se_dev;
 	unsigned long flags;
-	sector_t sectors;
 	int task_count, i, ret;
-	sector_t dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors;
+	sector_t sectors, dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors;
 	u32 sector_size = dev->se_sub_dev->se_dev_attrib.block_size;
 	struct scatterlist *sg;
 	struct scatterlist *cmd_sg;
 
 	WARN_ON(cmd->data_length % sector_size);
 	sectors = DIV_ROUND_UP(cmd->data_length, sector_size);
-	task_count = DIV_ROUND_UP(sectors, dev_max_sectors);
-
+	task_count = DIV_ROUND_UP_SECTOR_T(sectors, dev_max_sectors);
+	
 	cmd_sg = sgl;
 	for (i = 0; i < task_count; i++) {
 		unsigned int task_size;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 21d816e..f441726 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -28,6 +28,17 @@
 
 if WATCHDOG
 
+config WATCHDOG_CORE
+	bool "WatchDog Timer Driver Core"
+	---help---
+	  Say Y here if you want to use the new watchdog timer driver core.
+	  This driver provides a framework for all watchdog timer drivers
+	  and gives them the /dev/watchdog interface (and later also the
+	  sysfs interface).
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called watchdog.
+
 config WATCHDOG_NOWAYOUT
 	bool "Disable watchdog shutdown on close"
 	help
@@ -186,6 +197,15 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called sa1100_wdt.
 
+config DW_WATCHDOG
+	tristate "Synopsys DesignWare watchdog"
+	depends on ARM && HAVE_CLK
+	help
+	  Say Y here if to include support for the Synopsys DesignWare
+	  watchdog timer found in many ARM chips.
+	  To compile this driver as a module, choose M here: the
+	  module will be called dw_wdt.
+
 config MPCORE_WATCHDOG
 	tristate "MPcore watchdog"
 	depends on HAVE_ARM_TWD
@@ -321,7 +341,7 @@
 
 config IMX2_WDT
 	tristate "IMX2+ Watchdog"
-	depends on ARCH_MX2 || ARCH_MX25 || ARCH_MX3 || ARCH_MX5
+	depends on IMX_HAVE_PLATFORM_IMX2_WDT
 	help
 	  This is the driver for the hardware watchdog
 	  on the Freescale IMX2 and later processors.
@@ -879,6 +899,20 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called m54xx_wdt.
 
+# MicroBlaze Architecture
+
+config XILINX_WATCHDOG
+	tristate "Xilinx Watchdog timer"
+	depends on MICROBLAZE
+	---help---
+	  Watchdog driver for the xps_timebase_wdt ip core.
+
+	  IMPORTANT: The xps_timebase_wdt parent must have the property
+	  "clock-frequency" at device tree.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called of_xilinx_wdt.
+
 # MIPS Architecture
 
 config ATH79_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index ed26f70..55bd574 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -2,6 +2,10 @@
 # Makefile for the WatchDog device drivers.
 #
 
+# The WatchDog Timer Driver Core.
+watchdog-objs	+= watchdog_core.o watchdog_dev.o
+obj-$(CONFIG_WATCHDOG_CORE)	+= watchdog.o
+
 # Only one watchdog can succeed. We probe the ISA/PCI/USB based
 # watchdog-cards first, then the architecture specific watchdog
 # drivers and then the architecture independent "softdog" driver.
@@ -37,6 +41,7 @@
 obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
 obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
 obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o
+obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o
 obj-$(CONFIG_MPCORE_WATCHDOG) += mpcore_wdt.o
 obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o
 obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o
@@ -109,6 +114,9 @@
 # M68K Architecture
 obj-$(CONFIG_M54xx_WATCHDOG) += m54xx_wdt.o
 
+# MicroBlaze Architecture
+obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o
+
 # MIPS Architecture
 obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o
 obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index eac2602..87445b2 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -31,7 +31,7 @@
 #include <linux/bitops.h>
 #include <linux/uaccess.h>
 
-#include <mach/at91_wdt.h>
+#include "at91sam9_wdt.h"
 
 #define DRV_NAME "AT91SAM9 Watchdog"
 
@@ -284,27 +284,8 @@
 	return res;
 }
 
-#ifdef CONFIG_PM
-
-static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message)
-{
-	return 0;
-}
-
-static int at91wdt_resume(struct platform_device *pdev)
-{
-	return 0;
-}
-
-#else
-#define at91wdt_suspend	NULL
-#define at91wdt_resume	NULL
-#endif
-
 static struct platform_driver at91wdt_driver = {
 	.remove		= __exit_p(at91wdt_remove),
-	.suspend	= at91wdt_suspend,
-	.resume		= at91wdt_resume,
 	.driver		= {
 		.name	= "at91_wdt",
 		.owner	= THIS_MODULE,
diff --git a/arch/arm/mach-at91/include/mach/at91_wdt.h b/drivers/watchdog/at91sam9_wdt.h
similarity index 96%
rename from arch/arm/mach-at91/include/mach/at91_wdt.h
rename to drivers/watchdog/at91sam9_wdt.h
index fecc2e9..757f9ca 100644
--- a/arch/arm/mach-at91/include/mach/at91_wdt.h
+++ b/drivers/watchdog/at91sam9_wdt.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-at91/include/mach/at91_wdt.h
+ * drivers/watchdog/at91sam9_wdt.h
  *
  * Copyright (C) 2007 Andrew Victor
  * Copyright (C) 2007 Atmel Corporation.
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
new file mode 100644
index 0000000..f10f8c0
--- /dev/null
+++ b/drivers/watchdog/dw_wdt.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2010-2011 Picochip Ltd., Jamie Iles
+ * http://www.picochip.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file implements a driver for the Synopsys DesignWare watchdog device
+ * in the many ARM subsystems. The watchdog has 16 different timeout periods
+ * and these are a function of the input clock frequency.
+ *
+ * The DesignWare watchdog cannot be stopped once it has been started so we
+ * use a software timer to implement a ping that will keep the watchdog alive.
+ * If we receive an expected close for the watchdog then we keep the timer
+ * running, otherwise the timer is stopped and the watchdog will expire.
+ */
+#define pr_fmt(fmt) "dw_wdt: " fmt
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/uaccess.h>
+#include <linux/watchdog.h>
+
+#define WDOG_CONTROL_REG_OFFSET		    0x00
+#define WDOG_CONTROL_REG_WDT_EN_MASK	    0x01
+#define WDOG_TIMEOUT_RANGE_REG_OFFSET	    0x04
+#define WDOG_CURRENT_COUNT_REG_OFFSET	    0x08
+#define WDOG_COUNTER_RESTART_REG_OFFSET     0x0c
+#define WDOG_COUNTER_RESTART_KICK_VALUE	    0x76
+
+/* The maximum TOP (timeout period) value that can be set in the watchdog. */
+#define DW_WDT_MAX_TOP		15
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
+		 "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+#define WDT_TIMEOUT		(HZ / 2)
+
+static struct {
+	spinlock_t		lock;
+	void __iomem		*regs;
+	struct clk		*clk;
+	unsigned long		in_use;
+	unsigned long		next_heartbeat;
+	struct timer_list	timer;
+	int			expect_close;
+} dw_wdt;
+
+static inline int dw_wdt_is_enabled(void)
+{
+	return readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET) &
+		WDOG_CONTROL_REG_WDT_EN_MASK;
+}
+
+static inline int dw_wdt_top_in_seconds(unsigned top)
+{
+	/*
+	 * There are 16 possible timeout values in 0..15 where the number of
+	 * cycles is 2 ^ (16 + i) and the watchdog counts down.
+	 */
+	return (1 << (16 + top)) / clk_get_rate(dw_wdt.clk);
+}
+
+static int dw_wdt_get_top(void)
+{
+	int top = readl(dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF;
+
+	return dw_wdt_top_in_seconds(top);
+}
+
+static inline void dw_wdt_set_next_heartbeat(void)
+{
+	dw_wdt.next_heartbeat = jiffies + dw_wdt_get_top() * HZ;
+}
+
+static int dw_wdt_set_top(unsigned top_s)
+{
+	int i, top_val = DW_WDT_MAX_TOP;
+
+	/*
+	 * Iterate over the timeout values until we find the closest match. We
+	 * always look for >=.
+	 */
+	for (i = 0; i <= DW_WDT_MAX_TOP; ++i)
+		if (dw_wdt_top_in_seconds(i) >= top_s) {
+			top_val = i;
+			break;
+		}
+
+	/* Set the new value in the watchdog. */
+	writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+
+	dw_wdt_set_next_heartbeat();
+
+	return dw_wdt_top_in_seconds(top_val);
+}
+
+static void dw_wdt_keepalive(void)
+{
+	writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs +
+	       WDOG_COUNTER_RESTART_REG_OFFSET);
+}
+
+static void dw_wdt_ping(unsigned long data)
+{
+	if (time_before(jiffies, dw_wdt.next_heartbeat) ||
+	    (!nowayout && !dw_wdt.in_use)) {
+		dw_wdt_keepalive();
+		mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+	} else
+		pr_crit("keepalive missed, machine will reset\n");
+}
+
+static int dw_wdt_open(struct inode *inode, struct file *filp)
+{
+	if (test_and_set_bit(0, &dw_wdt.in_use))
+		return -EBUSY;
+
+	/* Make sure we don't get unloaded. */
+	__module_get(THIS_MODULE);
+
+	spin_lock(&dw_wdt.lock);
+	if (!dw_wdt_is_enabled()) {
+		/*
+		 * The watchdog is not currently enabled. Set the timeout to
+		 * the maximum and then start it.
+		 */
+		dw_wdt_set_top(DW_WDT_MAX_TOP);
+		writel(WDOG_CONTROL_REG_WDT_EN_MASK,
+		       dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
+	}
+
+	dw_wdt_set_next_heartbeat();
+
+	spin_unlock(&dw_wdt.lock);
+
+	return nonseekable_open(inode, filp);
+}
+
+ssize_t dw_wdt_write(struct file *filp, const char __user *buf, size_t len,
+		     loff_t *offset)
+{
+	if (!len)
+		return 0;
+
+	if (!nowayout) {
+		size_t i;
+
+		dw_wdt.expect_close = 0;
+
+		for (i = 0; i < len; ++i) {
+			char c;
+
+			if (get_user(c, buf + i))
+				return -EFAULT;
+
+			if (c == 'V') {
+				dw_wdt.expect_close = 1;
+				break;
+			}
+		}
+	}
+
+	dw_wdt_set_next_heartbeat();
+	mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+
+	return len;
+}
+
+static u32 dw_wdt_time_left(void)
+{
+	return readl(dw_wdt.regs + WDOG_CURRENT_COUNT_REG_OFFSET) /
+		clk_get_rate(dw_wdt.clk);
+}
+
+static const struct watchdog_info dw_wdt_ident = {
+	.options	= WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT |
+			  WDIOF_MAGICCLOSE,
+	.identity	= "Synopsys DesignWare Watchdog",
+};
+
+static long dw_wdt_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	unsigned long val;
+	int timeout;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		return copy_to_user((struct watchdog_info *)arg, &dw_wdt_ident,
+				    sizeof(dw_wdt_ident)) ? -EFAULT : 0;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		return put_user(0, (int *)arg);
+
+	case WDIOC_KEEPALIVE:
+		dw_wdt_set_next_heartbeat();
+		return 0;
+
+	case WDIOC_SETTIMEOUT:
+		if (get_user(val, (int __user *)arg))
+			return -EFAULT;
+		timeout = dw_wdt_set_top(val);
+		return put_user(timeout , (int __user *)arg);
+
+	case WDIOC_GETTIMEOUT:
+		return put_user(dw_wdt_get_top(), (int __user *)arg);
+
+	case WDIOC_GETTIMELEFT:
+		/* Get the time left until expiry. */
+		if (get_user(val, (int __user *)arg))
+			return -EFAULT;
+		return put_user(dw_wdt_time_left(), (int __user *)arg);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int dw_wdt_release(struct inode *inode, struct file *filp)
+{
+	clear_bit(0, &dw_wdt.in_use);
+
+	if (!dw_wdt.expect_close) {
+		del_timer(&dw_wdt.timer);
+
+		if (!nowayout)
+			pr_crit("unexpected close, system will reboot soon\n");
+		else
+			pr_crit("watchdog cannot be disabled, system will reboot soon\n");
+	}
+
+	dw_wdt.expect_close = 0;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int dw_wdt_suspend(struct device *dev)
+{
+	clk_disable(dw_wdt.clk);
+
+	return 0;
+}
+
+static int dw_wdt_resume(struct device *dev)
+{
+	int err = clk_enable(dw_wdt.clk);
+
+	if (err)
+		return err;
+
+	dw_wdt_keepalive();
+
+	return 0;
+}
+
+static const struct dev_pm_ops dw_wdt_pm_ops = {
+	.suspend	= dw_wdt_suspend,
+	.resume		= dw_wdt_resume,
+};
+#endif /* CONFIG_PM */
+
+static const struct file_operations wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.open		= dw_wdt_open,
+	.write		= dw_wdt_write,
+	.unlocked_ioctl	= dw_wdt_ioctl,
+	.release	= dw_wdt_release
+};
+
+static struct miscdevice dw_wdt_miscdev = {
+	.fops		= &wdt_fops,
+	.name		= "watchdog",
+	.minor		= WATCHDOG_MINOR,
+};
+
+static int __devinit dw_wdt_drv_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!mem)
+		return -EINVAL;
+
+	if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
+				     "dw_wdt"))
+		return -ENOMEM;
+
+	dw_wdt.regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+	if (!dw_wdt.regs)
+		return -ENOMEM;
+
+	dw_wdt.clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dw_wdt.clk))
+		return PTR_ERR(dw_wdt.clk);
+
+	ret = clk_enable(dw_wdt.clk);
+	if (ret)
+		goto out_put_clk;
+
+	spin_lock_init(&dw_wdt.lock);
+
+	ret = misc_register(&dw_wdt_miscdev);
+	if (ret)
+		goto out_disable_clk;
+
+	dw_wdt_set_next_heartbeat();
+	setup_timer(&dw_wdt.timer, dw_wdt_ping, 0);
+	mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+
+	return 0;
+
+out_disable_clk:
+	clk_disable(dw_wdt.clk);
+out_put_clk:
+	clk_put(dw_wdt.clk);
+
+	return ret;
+}
+
+static int __devexit dw_wdt_drv_remove(struct platform_device *pdev)
+{
+	misc_deregister(&dw_wdt_miscdev);
+
+	clk_disable(dw_wdt.clk);
+	clk_put(dw_wdt.clk);
+
+	return 0;
+}
+
+static struct platform_driver dw_wdt_driver = {
+	.probe		= dw_wdt_drv_probe,
+	.remove		= __devexit_p(dw_wdt_drv_remove),
+	.driver		= {
+		.name	= "dw_wdt",
+		.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+		.pm	= &dw_wdt_pm_ops,
+#endif /* CONFIG_PM */
+	},
+};
+
+static int __init dw_wdt_watchdog_init(void)
+{
+	return platform_driver_register(&dw_wdt_driver);
+}
+module_init(dw_wdt_watchdog_init);
+
+static void __exit dw_wdt_watchdog_exit(void)
+{
+	platform_driver_unregister(&dw_wdt_driver);
+}
+module_exit(dw_wdt_watchdog_exit);
+
+MODULE_AUTHOR("Jamie Iles");
+MODULE_DESCRIPTION("Synopsys DesignWare Watchdog Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 8cb2685..410fba4 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -36,7 +36,7 @@
 #include <asm/cacheflush.h>
 #endif /* CONFIG_HPWDT_NMI_DECODING */
 
-#define HPWDT_VERSION			"1.2.0"
+#define HPWDT_VERSION			"1.3.0"
 #define SECS_TO_TICKS(secs)		((secs) * 1000 / 128)
 #define TICKS_TO_SECS(ticks)		((ticks) * 128 / 1000)
 #define HPWDT_MAX_TIMER			TICKS_TO_SECS(65535)
@@ -87,6 +87,19 @@
 };
 #define SMBIOS_CRU64_INFORMATION	212
 
+/* type 219 */
+struct smbios_proliant_info {
+	u8 type;
+	u8 byte_length;
+	u16 handle;
+	u32 power_features;
+	u32 omega_features;
+	u32 reserved;
+	u32 misc_features;
+};
+#define SMBIOS_ICRU_INFORMATION		219
+
+
 struct cmn_registers {
 	union {
 		struct {
@@ -132,6 +145,7 @@
 static unsigned int hpwdt_nmi_decoding;
 static unsigned int allow_kdump;
 static unsigned int priority;		/* hpwdt at end of die_notify list */
+static unsigned int is_icru;
 static DEFINE_SPINLOCK(rom_lock);
 static void *cru_rom_addr;
 static struct cmn_registers cmn_regs;
@@ -476,19 +490,22 @@
 		goto out;
 
 	spin_lock_irqsave(&rom_lock, rom_pl);
-	if (!die_nmi_called)
+	if (!die_nmi_called && !is_icru)
 		asminline_call(&cmn_regs, cru_rom_addr);
 	die_nmi_called = 1;
 	spin_unlock_irqrestore(&rom_lock, rom_pl);
-	if (cmn_regs.u1.ral == 0) {
-		printk(KERN_WARNING "hpwdt: An NMI occurred, "
-			"but unable to determine source.\n");
-	} else {
-		if (allow_kdump)
-			hpwdt_stop();
-		panic("An NMI occurred, please see the Integrated "
-			"Management Log for details.\n");
+	if (!is_icru) {
+		if (cmn_regs.u1.ral == 0) {
+			printk(KERN_WARNING "hpwdt: An NMI occurred, "
+				"but unable to determine source.\n");
+		}
 	}
+
+	if (allow_kdump)
+		hpwdt_stop();
+	panic("An NMI occurred, please see the Integrated "
+		"Management Log for details.\n");
+
 out:
 	return NOTIFY_OK;
 }
@@ -659,30 +676,63 @@
 }
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+/*
+ *	dmi_find_icru
+ *
+ *	Routine Description:
+ *	This function checks whether or not we are on an iCRU-based server.
+ *	This check is independent of architecture and needs to be made for
+ *	any ProLiant system.
+ */
+static void __devinit dmi_find_icru(const struct dmi_header *dm, void *dummy)
+{
+	struct smbios_proliant_info *smbios_proliant_ptr;
+
+	if (dm->type == SMBIOS_ICRU_INFORMATION) {
+		smbios_proliant_ptr = (struct smbios_proliant_info *) dm;
+		if (smbios_proliant_ptr->misc_features & 0x01)
+			is_icru = 1;
+	}
+}
+
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
 	int retval;
 
 	/*
-	 * We need to map the ROM to get the CRU service.
-	 * For 32 bit Operating Systems we need to go through the 32 Bit
-	 * BIOS Service Directory
-	 * For 64 bit Operating Systems we get that service through SMBIOS.
+	 * On typical CRU-based systems we need to map that service in
+	 * the BIOS. For 32 bit Operating Systems we need to go through
+	 * the 32 Bit BIOS Service Directory. For 64 bit Operating
+	 * Systems we get that service through SMBIOS.
+	 *
+	 * On systems that support the new iCRU service all we need to
+	 * do is call dmi_walk to get the supported flag value and skip
+	 * the old cru detect code.
 	 */
-	retval = detect_cru_service();
-	if (retval < 0) {
-		dev_warn(&dev->dev,
-			"Unable to detect the %d Bit CRU Service.\n",
-			HPWDT_ARCH);
-		return retval;
-	}
+	dmi_walk(dmi_find_icru, NULL);
+	if (!is_icru) {
 
-	/*
-	 * We know this is the only CRU call we need to make so lets keep as
-	 * few instructions as possible once the NMI comes in.
-	 */
-	cmn_regs.u1.rah = 0x0D;
-	cmn_regs.u1.ral = 0x02;
+		/*
+		* We need to map the ROM to get the CRU service.
+		* For 32 bit Operating Systems we need to go through the 32 Bit
+		* BIOS Service Directory
+		* For 64 bit Operating Systems we get that service through SMBIOS.
+		*/
+		retval = detect_cru_service();
+		if (retval < 0) {
+			dev_warn(&dev->dev,
+				"Unable to detect the %d Bit CRU Service.\n",
+				HPWDT_ARCH);
+			return retval;
+		}
+
+		/*
+		* We know this is the only CRU call we need to make so lets keep as
+		* few instructions as possible once the NMI comes in.
+		*/
+		cmn_regs.u1.rah = 0x0D;
+		cmn_regs.u1.ral = 0x02;
+	}
 
 	/*
 	 * If the priority is set to 1, then we will be put first on the
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 5fd020d..751a591 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -120,72 +120,12 @@
 	TCO_3420,	/* 3420 */
 	TCO_3450,	/* 3450 */
 	TCO_EP80579,	/* EP80579 */
-	TCO_CPT1,	/* Cougar Point */
-	TCO_CPT2,	/* Cougar Point Desktop */
-	TCO_CPT3,	/* Cougar Point Mobile */
-	TCO_CPT4,	/* Cougar Point */
-	TCO_CPT5,	/* Cougar Point */
-	TCO_CPT6,	/* Cougar Point */
-	TCO_CPT7,	/* Cougar Point */
-	TCO_CPT8,	/* Cougar Point */
-	TCO_CPT9,	/* Cougar Point */
-	TCO_CPT10,	/* Cougar Point */
-	TCO_CPT11,	/* Cougar Point */
-	TCO_CPT12,	/* Cougar Point */
-	TCO_CPT13,	/* Cougar Point */
-	TCO_CPT14,	/* Cougar Point */
-	TCO_CPT15,	/* Cougar Point */
-	TCO_CPT16,	/* Cougar Point */
-	TCO_CPT17,	/* Cougar Point */
-	TCO_CPT18,	/* Cougar Point */
-	TCO_CPT19,	/* Cougar Point */
-	TCO_CPT20,	/* Cougar Point */
-	TCO_CPT21,	/* Cougar Point */
-	TCO_CPT22,	/* Cougar Point */
-	TCO_CPT23,	/* Cougar Point */
-	TCO_CPT24,	/* Cougar Point */
-	TCO_CPT25,	/* Cougar Point */
-	TCO_CPT26,	/* Cougar Point */
-	TCO_CPT27,	/* Cougar Point */
-	TCO_CPT28,	/* Cougar Point */
-	TCO_CPT29,	/* Cougar Point */
-	TCO_CPT30,	/* Cougar Point */
-	TCO_CPT31,	/* Cougar Point */
-	TCO_PBG1,	/* Patsburg */
-	TCO_PBG2,	/* Patsburg */
+	TCO_CPT,	/* Cougar Point */
+	TCO_CPTD,	/* Cougar Point Desktop */
+	TCO_CPTM,	/* Cougar Point Mobile */
+	TCO_PBG,	/* Patsburg */
 	TCO_DH89XXCC,	/* DH89xxCC */
-	TCO_PPT0,	/* Panther Point */
-	TCO_PPT1,	/* Panther Point */
-	TCO_PPT2,	/* Panther Point */
-	TCO_PPT3,	/* Panther Point */
-	TCO_PPT4,	/* Panther Point */
-	TCO_PPT5,	/* Panther Point */
-	TCO_PPT6,	/* Panther Point */
-	TCO_PPT7,	/* Panther Point */
-	TCO_PPT8,	/* Panther Point */
-	TCO_PPT9,	/* Panther Point */
-	TCO_PPT10,	/* Panther Point */
-	TCO_PPT11,	/* Panther Point */
-	TCO_PPT12,	/* Panther Point */
-	TCO_PPT13,	/* Panther Point */
-	TCO_PPT14,	/* Panther Point */
-	TCO_PPT15,	/* Panther Point */
-	TCO_PPT16,	/* Panther Point */
-	TCO_PPT17,	/* Panther Point */
-	TCO_PPT18,	/* Panther Point */
-	TCO_PPT19,	/* Panther Point */
-	TCO_PPT20,	/* Panther Point */
-	TCO_PPT21,	/* Panther Point */
-	TCO_PPT22,	/* Panther Point */
-	TCO_PPT23,	/* Panther Point */
-	TCO_PPT24,	/* Panther Point */
-	TCO_PPT25,	/* Panther Point */
-	TCO_PPT26,	/* Panther Point */
-	TCO_PPT27,	/* Panther Point */
-	TCO_PPT28,	/* Panther Point */
-	TCO_PPT29,	/* Panther Point */
-	TCO_PPT30,	/* Panther Point */
-	TCO_PPT31,	/* Panther Point */
+	TCO_PPT,	/* Panther Point */
 };
 
 static struct {
@@ -244,83 +184,14 @@
 	{"3450", 2},
 	{"EP80579", 2},
 	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point", 2},
-	{"Patsburg", 2},
+	{"Cougar Point Desktop", 2},
+	{"Cougar Point Mobile", 2},
 	{"Patsburg", 2},
 	{"DH89xxCC", 2},
 	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
-	{"Panther Point", 2},
 	{NULL, 0}
 };
 
-#define ITCO_PCI_DEVICE(dev, data) \
-	.vendor = PCI_VENDOR_ID_INTEL,	\
-	.device = dev,			\
-	.subvendor = PCI_ANY_ID,	\
-	.subdevice = PCI_ANY_ID,	\
-	.class = 0,			\
-	.class_mask = 0,		\
-	.driver_data = data
-
 /*
  * This data only exists for exporting the supported PCI ids
  * via MODULE_DEVICE_TABLE.  We do not actually register a
@@ -328,138 +199,138 @@
  * functions that probably will be registered by other drivers.
  */
 static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = {
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AA_0,	TCO_ICH)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AB_0,	TCO_ICH0)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_0,	TCO_ICH2)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_10,	TCO_ICH2M)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_0,	TCO_ICH3)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_12,	TCO_ICH3M)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_0,	TCO_ICH4)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_12,	TCO_ICH4M)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801E_0,		TCO_CICH)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801EB_0,	TCO_ICH5)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB_1,		TCO_6300ESB)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_0,		TCO_ICH6)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_1,		TCO_ICH6M)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_2,		TCO_ICH6W)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB2_0,		TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2671,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2672,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2673,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2674,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2675,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2676,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2677,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2678,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x2679,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x267a,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x267b,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x267c,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x267d,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x267e,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(0x267f,				TCO_631XESB)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_0,		TCO_ICH7)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_30,		TCO_ICH7DH)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_1,		TCO_ICH7M)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_31,		TCO_ICH7MDH)},
-	{ ITCO_PCI_DEVICE(0x27bc,				TCO_NM10)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_0,		TCO_ICH8)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_2,		TCO_ICH8DH)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_3,		TCO_ICH8DO)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_4,		TCO_ICH8M)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_1,		TCO_ICH8ME)},
-	{ ITCO_PCI_DEVICE(0x2918,				TCO_ICH9)},
-	{ ITCO_PCI_DEVICE(0x2916,				TCO_ICH9R)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_2,		TCO_ICH9DH)},
-	{ ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_4,		TCO_ICH9DO)},
-	{ ITCO_PCI_DEVICE(0x2919,				TCO_ICH9M)},
-	{ ITCO_PCI_DEVICE(0x2917,				TCO_ICH9ME)},
-	{ ITCO_PCI_DEVICE(0x3a18,				TCO_ICH10)},
-	{ ITCO_PCI_DEVICE(0x3a16,				TCO_ICH10R)},
-	{ ITCO_PCI_DEVICE(0x3a1a,				TCO_ICH10D)},
-	{ ITCO_PCI_DEVICE(0x3a14,				TCO_ICH10DO)},
-	{ ITCO_PCI_DEVICE(0x3b00,				TCO_PCH)},
-	{ ITCO_PCI_DEVICE(0x3b01,				TCO_PCHM)},
-	{ ITCO_PCI_DEVICE(0x3b02,				TCO_P55)},
-	{ ITCO_PCI_DEVICE(0x3b03,				TCO_PM55)},
-	{ ITCO_PCI_DEVICE(0x3b06,				TCO_H55)},
-	{ ITCO_PCI_DEVICE(0x3b07,				TCO_QM57)},
-	{ ITCO_PCI_DEVICE(0x3b08,				TCO_H57)},
-	{ ITCO_PCI_DEVICE(0x3b09,				TCO_HM55)},
-	{ ITCO_PCI_DEVICE(0x3b0a,				TCO_Q57)},
-	{ ITCO_PCI_DEVICE(0x3b0b,				TCO_HM57)},
-	{ ITCO_PCI_DEVICE(0x3b0d,				TCO_PCHMSFF)},
-	{ ITCO_PCI_DEVICE(0x3b0f,				TCO_QS57)},
-	{ ITCO_PCI_DEVICE(0x3b12,				TCO_3400)},
-	{ ITCO_PCI_DEVICE(0x3b14,				TCO_3420)},
-	{ ITCO_PCI_DEVICE(0x3b16,				TCO_3450)},
-	{ ITCO_PCI_DEVICE(0x5031,				TCO_EP80579)},
-	{ ITCO_PCI_DEVICE(0x1c41,				TCO_CPT1)},
-	{ ITCO_PCI_DEVICE(0x1c42,				TCO_CPT2)},
-	{ ITCO_PCI_DEVICE(0x1c43,				TCO_CPT3)},
-	{ ITCO_PCI_DEVICE(0x1c44,				TCO_CPT4)},
-	{ ITCO_PCI_DEVICE(0x1c45,				TCO_CPT5)},
-	{ ITCO_PCI_DEVICE(0x1c46,				TCO_CPT6)},
-	{ ITCO_PCI_DEVICE(0x1c47,				TCO_CPT7)},
-	{ ITCO_PCI_DEVICE(0x1c48,				TCO_CPT8)},
-	{ ITCO_PCI_DEVICE(0x1c49,				TCO_CPT9)},
-	{ ITCO_PCI_DEVICE(0x1c4a,				TCO_CPT10)},
-	{ ITCO_PCI_DEVICE(0x1c4b,				TCO_CPT11)},
-	{ ITCO_PCI_DEVICE(0x1c4c,				TCO_CPT12)},
-	{ ITCO_PCI_DEVICE(0x1c4d,				TCO_CPT13)},
-	{ ITCO_PCI_DEVICE(0x1c4e,				TCO_CPT14)},
-	{ ITCO_PCI_DEVICE(0x1c4f,				TCO_CPT15)},
-	{ ITCO_PCI_DEVICE(0x1c50,				TCO_CPT16)},
-	{ ITCO_PCI_DEVICE(0x1c51,				TCO_CPT17)},
-	{ ITCO_PCI_DEVICE(0x1c52,				TCO_CPT18)},
-	{ ITCO_PCI_DEVICE(0x1c53,				TCO_CPT19)},
-	{ ITCO_PCI_DEVICE(0x1c54,				TCO_CPT20)},
-	{ ITCO_PCI_DEVICE(0x1c55,				TCO_CPT21)},
-	{ ITCO_PCI_DEVICE(0x1c56,				TCO_CPT22)},
-	{ ITCO_PCI_DEVICE(0x1c57,				TCO_CPT23)},
-	{ ITCO_PCI_DEVICE(0x1c58,				TCO_CPT24)},
-	{ ITCO_PCI_DEVICE(0x1c59,				TCO_CPT25)},
-	{ ITCO_PCI_DEVICE(0x1c5a,				TCO_CPT26)},
-	{ ITCO_PCI_DEVICE(0x1c5b,				TCO_CPT27)},
-	{ ITCO_PCI_DEVICE(0x1c5c,				TCO_CPT28)},
-	{ ITCO_PCI_DEVICE(0x1c5d,				TCO_CPT29)},
-	{ ITCO_PCI_DEVICE(0x1c5e,				TCO_CPT30)},
-	{ ITCO_PCI_DEVICE(0x1c5f,				TCO_CPT31)},
-	{ ITCO_PCI_DEVICE(0x1d40,				TCO_PBG1)},
-	{ ITCO_PCI_DEVICE(0x1d41,				TCO_PBG2)},
-	{ ITCO_PCI_DEVICE(0x2310,				TCO_DH89XXCC)},
-	{ ITCO_PCI_DEVICE(0x1e40,				TCO_PPT0)},
-	{ ITCO_PCI_DEVICE(0x1e41,				TCO_PPT1)},
-	{ ITCO_PCI_DEVICE(0x1e42,				TCO_PPT2)},
-	{ ITCO_PCI_DEVICE(0x1e43,				TCO_PPT3)},
-	{ ITCO_PCI_DEVICE(0x1e44,				TCO_PPT4)},
-	{ ITCO_PCI_DEVICE(0x1e45,				TCO_PPT5)},
-	{ ITCO_PCI_DEVICE(0x1e46,				TCO_PPT6)},
-	{ ITCO_PCI_DEVICE(0x1e47,				TCO_PPT7)},
-	{ ITCO_PCI_DEVICE(0x1e48,				TCO_PPT8)},
-	{ ITCO_PCI_DEVICE(0x1e49,				TCO_PPT9)},
-	{ ITCO_PCI_DEVICE(0x1e4a,				TCO_PPT10)},
-	{ ITCO_PCI_DEVICE(0x1e4b,				TCO_PPT11)},
-	{ ITCO_PCI_DEVICE(0x1e4c,				TCO_PPT12)},
-	{ ITCO_PCI_DEVICE(0x1e4d,				TCO_PPT13)},
-	{ ITCO_PCI_DEVICE(0x1e4e,				TCO_PPT14)},
-	{ ITCO_PCI_DEVICE(0x1e4f,				TCO_PPT15)},
-	{ ITCO_PCI_DEVICE(0x1e50,				TCO_PPT16)},
-	{ ITCO_PCI_DEVICE(0x1e51,				TCO_PPT17)},
-	{ ITCO_PCI_DEVICE(0x1e52,				TCO_PPT18)},
-	{ ITCO_PCI_DEVICE(0x1e53,				TCO_PPT19)},
-	{ ITCO_PCI_DEVICE(0x1e54,				TCO_PPT20)},
-	{ ITCO_PCI_DEVICE(0x1e55,				TCO_PPT21)},
-	{ ITCO_PCI_DEVICE(0x1e56,				TCO_PPT22)},
-	{ ITCO_PCI_DEVICE(0x1e57,				TCO_PPT23)},
-	{ ITCO_PCI_DEVICE(0x1e58,				TCO_PPT24)},
-	{ ITCO_PCI_DEVICE(0x1e59,				TCO_PPT25)},
-	{ ITCO_PCI_DEVICE(0x1e5a,				TCO_PPT26)},
-	{ ITCO_PCI_DEVICE(0x1e5b,				TCO_PPT27)},
-	{ ITCO_PCI_DEVICE(0x1e5c,				TCO_PPT28)},
-	{ ITCO_PCI_DEVICE(0x1e5d,				TCO_PPT29)},
-	{ ITCO_PCI_DEVICE(0x1e5e,				TCO_PPT30)},
-	{ ITCO_PCI_DEVICE(0x1e5f,				TCO_PPT31)},
+	{ PCI_VDEVICE(INTEL, 0x2410), TCO_ICH},
+	{ PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0},
+	{ PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2},
+	{ PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M},
+	{ PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3},
+	{ PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M},
+	{ PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4},
+	{ PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M},
+	{ PCI_VDEVICE(INTEL, 0x2450), TCO_CICH},
+	{ PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5},
+	{ PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB},
+	{ PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6},
+	{ PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M},
+	{ PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W},
+	{ PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7},
+	{ PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH},
+	{ PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M},
+	{ PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH},
+	{ PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10},
+	{ PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8},
+	{ PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH},
+	{ PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO},
+	{ PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M},
+	{ PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME},
+	{ PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9},
+	{ PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R},
+	{ PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH},
+	{ PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO},
+	{ PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M},
+	{ PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME},
+	{ PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10},
+	{ PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R},
+	{ PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D},
+	{ PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO},
+	{ PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH},
+	{ PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM},
+	{ PCI_VDEVICE(INTEL, 0x3b02), TCO_P55},
+	{ PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55},
+	{ PCI_VDEVICE(INTEL, 0x3b06), TCO_H55},
+	{ PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57},
+	{ PCI_VDEVICE(INTEL, 0x3b08), TCO_H57},
+	{ PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55},
+	{ PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57},
+	{ PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57},
+	{ PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF},
+	{ PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57},
+	{ PCI_VDEVICE(INTEL, 0x3b12), TCO_3400},
+	{ PCI_VDEVICE(INTEL, 0x3b14), TCO_3420},
+	{ PCI_VDEVICE(INTEL, 0x3b16), TCO_3450},
+	{ PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579},
+	{ PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD},
+	{ PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM},
+	{ PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG},
+	{ PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG},
+	{ PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC},
+	{ PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT},
 	{ 0, },			/* End of list */
 };
 MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
@@ -1052,15 +923,10 @@
 	iTCO_wdt_stop();
 }
 
-#define iTCO_wdt_suspend NULL
-#define iTCO_wdt_resume  NULL
-
 static struct platform_driver iTCO_wdt_driver = {
 	.probe          = iTCO_wdt_probe,
 	.remove         = __devexit_p(iTCO_wdt_remove),
 	.shutdown       = iTCO_wdt_shutdown,
-	.suspend        = iTCO_wdt_suspend,
-	.resume         = iTCO_wdt_resume,
 	.driver         = {
 		.owner  = THIS_MODULE,
 		.name   = DRV_NAME,
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 86f7cac..b8ef2c6 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -329,12 +329,18 @@
 	}
 }
 
+static const struct of_device_id imx2_wdt_dt_ids[] = {
+	{ .compatible = "fsl,imx21-wdt", },
+	{ /* sentinel */ }
+};
+
 static struct platform_driver imx2_wdt_driver = {
 	.remove		= __exit_p(imx2_wdt_remove),
 	.shutdown	= imx2_wdt_shutdown,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.owner	= THIS_MODULE,
+		.of_match_table = imx2_wdt_dt_ids,
 	},
 };
 
diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c
index 6143f52..8d2d850 100644
--- a/drivers/watchdog/it8712f_wdt.c
+++ b/drivers/watchdog/it8712f_wdt.c
@@ -28,10 +28,10 @@
 #include <linux/notifier.h>
 #include <linux/reboot.h>
 #include <linux/fs.h>
-#include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/ioport.h>
 
 #define NAME "it8712f_wdt"
 
@@ -51,7 +51,6 @@
 
 static unsigned long wdt_open;
 static unsigned expect_close;
-static spinlock_t io_lock;
 static unsigned char revision;
 
 /* Dog Food address - We use the game port address */
@@ -121,20 +120,26 @@
 	outb(ldn, VAL);
 }
 
-static inline void superio_enter(void)
+static inline int superio_enter(void)
 {
-	spin_lock(&io_lock);
+	/*
+	 * Try to reserve REG and REG + 1 for exclusive access.
+	 */
+	if (!request_muxed_region(REG, 2, NAME))
+		return -EBUSY;
+
 	outb(0x87, REG);
 	outb(0x01, REG);
 	outb(0x55, REG);
 	outb(0x55, REG);
+	return 0;
 }
 
 static inline void superio_exit(void)
 {
 	outb(0x02, REG);
 	outb(0x02, VAL);
-	spin_unlock(&io_lock);
+	release_region(REG, 2);
 }
 
 static inline void it8712f_wdt_ping(void)
@@ -173,10 +178,13 @@
 		return 0;
 }
 
-static void it8712f_wdt_enable(void)
+static int it8712f_wdt_enable(void)
 {
+	int ret = superio_enter();
+	if (ret)
+		return ret;
+
 	printk(KERN_DEBUG NAME ": enabling watchdog timer\n");
-	superio_enter();
 	superio_select(LDN_GPIO);
 
 	superio_outb(wdt_control_reg, WDT_CONTROL);
@@ -186,13 +194,17 @@
 	superio_exit();
 
 	it8712f_wdt_ping();
+
+	return 0;
 }
 
-static void it8712f_wdt_disable(void)
+static int it8712f_wdt_disable(void)
 {
-	printk(KERN_DEBUG NAME ": disabling watchdog timer\n");
+	int ret = superio_enter();
+	if (ret)
+		return ret;
 
-	superio_enter();
+	printk(KERN_DEBUG NAME ": disabling watchdog timer\n");
 	superio_select(LDN_GPIO);
 
 	superio_outb(0, WDT_CONFIG);
@@ -202,6 +214,7 @@
 	superio_outb(0, WDT_TIMEOUT);
 
 	superio_exit();
+	return 0;
 }
 
 static int it8712f_wdt_notify(struct notifier_block *this,
@@ -252,6 +265,7 @@
 						WDIOF_MAGICCLOSE,
 	};
 	int value;
+	int ret;
 
 	switch (cmd) {
 	case WDIOC_GETSUPPORT:
@@ -259,7 +273,9 @@
 			return -EFAULT;
 		return 0;
 	case WDIOC_GETSTATUS:
-		superio_enter();
+		ret = superio_enter();
+		if (ret)
+			return ret;
 		superio_select(LDN_GPIO);
 
 		value = it8712f_wdt_get_status();
@@ -280,7 +296,9 @@
 		if (value > (max_units * 60))
 			return -EINVAL;
 		margin = value;
-		superio_enter();
+		ret = superio_enter();
+		if (ret)
+			return ret;
 		superio_select(LDN_GPIO);
 
 		it8712f_wdt_update_margin();
@@ -299,10 +317,14 @@
 
 static int it8712f_wdt_open(struct inode *inode, struct file *file)
 {
+	int ret;
 	/* only allow one at a time */
 	if (test_and_set_bit(0, &wdt_open))
 		return -EBUSY;
-	it8712f_wdt_enable();
+
+	ret = it8712f_wdt_enable();
+	if (ret)
+		return ret;
 	return nonseekable_open(inode, file);
 }
 
@@ -313,7 +335,8 @@
 			": watchdog device closed unexpectedly, will not"
 			" disable the watchdog timer\n");
 	} else if (!nowayout) {
-		it8712f_wdt_disable();
+		if (it8712f_wdt_disable())
+			printk(KERN_WARNING NAME "Watchdog disable failed\n");
 	}
 	expect_close = 0;
 	clear_bit(0, &wdt_open);
@@ -340,8 +363,10 @@
 {
 	int err = -ENODEV;
 	int chip_type;
+	int ret = superio_enter();
+	if (ret)
+		return ret;
 
-	superio_enter();
 	chip_type = superio_inw(DEVID);
 	if (chip_type != IT8712F_DEVID)
 		goto exit;
@@ -382,8 +407,6 @@
 {
 	int err = 0;
 
-	spin_lock_init(&io_lock);
-
 	if (it8712f_wdt_find(&address))
 		return -ENODEV;
 
@@ -392,7 +415,11 @@
 		return -EBUSY;
 	}
 
-	it8712f_wdt_disable();
+	err = it8712f_wdt_disable();
+	if (err) {
+		printk(KERN_ERR NAME ": unable to disable watchdog timer.\n");
+		goto out;
+	}
 
 	err = register_reboot_notifier(&it8712f_wdt_notifier);
 	if (err) {
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index b1bc72f..a2d9a12 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -137,7 +137,6 @@
 
 static	unsigned int base, gpact, ciract, max_units, chip_type;
 static	unsigned long wdt_status;
-static	DEFINE_SPINLOCK(spinlock);
 
 static	int nogameport = DEFAULT_NOGAMEPORT;
 static	int exclusive  = DEFAULT_EXCLUSIVE;
@@ -163,18 +162,26 @@
 
 /* Superio Chip */
 
-static inline void superio_enter(void)
+static inline int superio_enter(void)
 {
+	/*
+	 * Try to reserve REG and REG + 1 for exclusive access.
+	 */
+	if (!request_muxed_region(REG, 2, WATCHDOG_NAME))
+		return -EBUSY;
+
 	outb(0x87, REG);
 	outb(0x01, REG);
 	outb(0x55, REG);
 	outb(0x55, REG);
+	return 0;
 }
 
 static inline void superio_exit(void)
 {
 	outb(0x02, REG);
 	outb(0x02, VAL);
+	release_region(REG, 2);
 }
 
 static inline void superio_select(int ldn)
@@ -255,12 +262,11 @@
 	set_bit(WDTS_KEEPALIVE, &wdt_status);
 }
 
-static void wdt_start(void)
+static int wdt_start(void)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&spinlock, flags);
-	superio_enter();
+	int ret = superio_enter();
+	if (ret)
+		return ret;
 
 	superio_select(GPIO);
 	if (test_bit(WDTS_USE_GP, &wdt_status))
@@ -270,15 +276,15 @@
 	wdt_update_timeout();
 
 	superio_exit();
-	spin_unlock_irqrestore(&spinlock, flags);
+
+	return 0;
 }
 
-static void wdt_stop(void)
+static int wdt_stop(void)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&spinlock, flags);
-	superio_enter();
+	int ret = superio_enter();
+	if (ret)
+		return ret;
 
 	superio_select(GPIO);
 	superio_outb(0x00, WDTCTRL);
@@ -288,7 +294,7 @@
 		superio_outb(0x00, WDTVALMSB);
 
 	superio_exit();
-	spin_unlock_irqrestore(&spinlock, flags);
+	return 0;
 }
 
 /**
@@ -303,8 +309,6 @@
 
 static int wdt_set_timeout(int t)
 {
-	unsigned long flags;
-
 	if (t < 1 || t > max_units * 60)
 		return -EINVAL;
 
@@ -313,14 +317,15 @@
 	else
 		timeout = t;
 
-	spin_lock_irqsave(&spinlock, flags);
 	if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		superio_enter();
+		int ret = superio_enter();
+		if (ret)
+			return ret;
+
 		superio_select(GPIO);
 		wdt_update_timeout();
 		superio_exit();
 	}
-	spin_unlock_irqrestore(&spinlock, flags);
 	return 0;
 }
 
@@ -339,12 +344,12 @@
 
 static int wdt_get_status(int *status)
 {
-	unsigned long flags;
-
 	*status = 0;
 	if (testmode) {
-		spin_lock_irqsave(&spinlock, flags);
-		superio_enter();
+		int ret = superio_enter();
+		if (ret)
+			return ret;
+
 		superio_select(GPIO);
 		if (superio_inb(WDTCTRL) & WDT_ZERO) {
 			superio_outb(0x00, WDTCTRL);
@@ -353,7 +358,6 @@
 		}
 
 		superio_exit();
-		spin_unlock_irqrestore(&spinlock, flags);
 	}
 	if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status))
 		*status |= WDIOF_KEEPALIVEPING;
@@ -379,9 +383,17 @@
 	if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status))
 		return -EBUSY;
 	if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
+		int ret;
 		if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status))
 			__module_get(THIS_MODULE);
-		wdt_start();
+
+		ret = wdt_start();
+		if (ret) {
+			clear_bit(WDTS_LOCKED, &wdt_status);
+			clear_bit(WDTS_TIMER_RUN, &wdt_status);
+			clear_bit(WDTS_DEV_OPEN, &wdt_status);
+			return ret;
+		}
 	}
 	return nonseekable_open(inode, file);
 }
@@ -403,7 +415,16 @@
 {
 	if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
 		if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) {
-			wdt_stop();
+			int ret = wdt_stop();
+			if (ret) {
+				/*
+				 * Stop failed. Just keep the watchdog alive
+				 * and hope nothing bad happens.
+				 */
+				set_bit(WDTS_EXPECTED, &wdt_status);
+				wdt_keepalive();
+				return ret;
+			}
 			clear_bit(WDTS_TIMER_RUN, &wdt_status);
 		} else {
 			wdt_keepalive();
@@ -484,7 +505,9 @@
 				    &ident, sizeof(ident)) ? -EFAULT : 0;
 
 	case WDIOC_GETSTATUS:
-		wdt_get_status(&status);
+		rc = wdt_get_status(&status);
+		if (rc)
+			return rc;
 		return put_user(status, uarg.i);
 
 	case WDIOC_GETBOOTSTATUS:
@@ -500,14 +523,22 @@
 
 		switch (new_options) {
 		case WDIOS_DISABLECARD:
-			if (test_bit(WDTS_TIMER_RUN, &wdt_status))
-				wdt_stop();
+			if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
+				rc = wdt_stop();
+				if (rc)
+					return rc;
+			}
 			clear_bit(WDTS_TIMER_RUN, &wdt_status);
 			return 0;
 
 		case WDIOS_ENABLECARD:
-			if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status))
-				wdt_start();
+			if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
+				rc = wdt_start();
+				if (rc) {
+					clear_bit(WDTS_TIMER_RUN, &wdt_status);
+					return rc;
+				}
+			}
 			return 0;
 
 		default:
@@ -560,16 +591,17 @@
 	int rc = 0;
 	int try_gameport = !nogameport;
 	u8  chip_rev;
-	unsigned long flags;
+	int gp_rreq_fail = 0;
 
 	wdt_status = 0;
 
-	spin_lock_irqsave(&spinlock, flags);
-	superio_enter();
+	rc = superio_enter();
+	if (rc)
+		return rc;
+
 	chip_type = superio_inw(CHIPID);
 	chip_rev  = superio_inb(CHIPREV) & 0x0f;
 	superio_exit();
-	spin_unlock_irqrestore(&spinlock, flags);
 
 	switch (chip_type) {
 	case IT8702_ID:
@@ -603,8 +635,9 @@
 		return -ENODEV;
 	}
 
-	spin_lock_irqsave(&spinlock, flags);
-	superio_enter();
+	rc = superio_enter();
+	if (rc)
+		return rc;
 
 	superio_select(GPIO);
 	superio_outb(WDT_TOV1, WDTCFG);
@@ -620,21 +653,16 @@
 		}
 		gpact = superio_inb(ACTREG);
 		superio_outb(0x01, ACTREG);
-		superio_exit();
-		spin_unlock_irqrestore(&spinlock, flags);
 		if (request_region(base, 1, WATCHDOG_NAME))
 			set_bit(WDTS_USE_GP, &wdt_status);
 		else
-			rc = -EIO;
-	} else {
-		superio_exit();
-		spin_unlock_irqrestore(&spinlock, flags);
+			gp_rreq_fail = 1;
 	}
 
 	/* If we haven't Gameport support, try to get CIR support */
 	if (!test_bit(WDTS_USE_GP, &wdt_status)) {
 		if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) {
-			if (rc == -EIO)
+			if (gp_rreq_fail)
 				printk(KERN_ERR PFX
 					"I/O Address 0x%04x and 0x%04x"
 					" already in use\n", base, CIR_BASE);
@@ -646,21 +674,16 @@
 			goto err_out;
 		}
 		base = CIR_BASE;
-		spin_lock_irqsave(&spinlock, flags);
-		superio_enter();
 
 		superio_select(CIR);
 		superio_outw(base, BASEREG);
 		superio_outb(0x00, CIR_ILS);
 		ciract = superio_inb(ACTREG);
 		superio_outb(0x01, ACTREG);
-		if (rc == -EIO) {
+		if (gp_rreq_fail) {
 			superio_select(GAMEPORT);
 			superio_outb(gpact, ACTREG);
 		}
-
-		superio_exit();
-		spin_unlock_irqrestore(&spinlock, flags);
 	}
 
 	if (timeout < 1 || timeout > max_units * 60) {
@@ -704,6 +727,7 @@
 		"nogameport=%d)\n", chip_type, chip_rev, timeout,
 		nowayout, testmode, exclusive, nogameport);
 
+	superio_exit();
 	return 0;
 
 err_out_reboot:
@@ -711,49 +735,37 @@
 err_out_region:
 	release_region(base, test_bit(WDTS_USE_GP, &wdt_status) ? 1 : 8);
 	if (!test_bit(WDTS_USE_GP, &wdt_status)) {
-		spin_lock_irqsave(&spinlock, flags);
-		superio_enter();
 		superio_select(CIR);
 		superio_outb(ciract, ACTREG);
-		superio_exit();
-		spin_unlock_irqrestore(&spinlock, flags);
 	}
 err_out:
 	if (try_gameport) {
-		spin_lock_irqsave(&spinlock, flags);
-		superio_enter();
 		superio_select(GAMEPORT);
 		superio_outb(gpact, ACTREG);
-		superio_exit();
-		spin_unlock_irqrestore(&spinlock, flags);
 	}
 
+	superio_exit();
 	return rc;
 }
 
 static void __exit it87_wdt_exit(void)
 {
-	unsigned long flags;
-	int nolock;
-
-	nolock = !spin_trylock_irqsave(&spinlock, flags);
-	superio_enter();
-	superio_select(GPIO);
-	superio_outb(0x00, WDTCTRL);
-	superio_outb(0x00, WDTCFG);
-	superio_outb(0x00, WDTVALLSB);
-	if (max_units > 255)
-		superio_outb(0x00, WDTVALMSB);
-	if (test_bit(WDTS_USE_GP, &wdt_status)) {
-		superio_select(GAMEPORT);
-		superio_outb(gpact, ACTREG);
-	} else {
-		superio_select(CIR);
-		superio_outb(ciract, ACTREG);
+	if (superio_enter() == 0) {
+		superio_select(GPIO);
+		superio_outb(0x00, WDTCTRL);
+		superio_outb(0x00, WDTCFG);
+		superio_outb(0x00, WDTVALLSB);
+		if (max_units > 255)
+			superio_outb(0x00, WDTVALMSB);
+		if (test_bit(WDTS_USE_GP, &wdt_status)) {
+			superio_select(GAMEPORT);
+			superio_outb(gpact, ACTREG);
+		} else {
+			superio_select(CIR);
+			superio_outb(ciract, ACTREG);
+		}
+		superio_exit();
 	}
-	superio_exit();
-	if (!nolock)
-		spin_unlock_irqrestore(&spinlock, flags);
 
 	misc_deregister(&wdt_miscdev);
 	unregister_reboot_notifier(&wdt_notifier);
diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c
index 2b4af22..4dc3102 100644
--- a/drivers/watchdog/mpcore_wdt.c
+++ b/drivers/watchdog/mpcore_wdt.c
@@ -407,12 +407,35 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int mpcore_wdt_suspend(struct platform_device *dev, pm_message_t msg)
+{
+	struct mpcore_wdt *wdt = platform_get_drvdata(dev);
+	mpcore_wdt_stop(wdt);		/* Turn the WDT off */
+	return 0;
+}
+
+static int mpcore_wdt_resume(struct platform_device *dev)
+{
+	struct mpcore_wdt *wdt = platform_get_drvdata(dev);
+	/* re-activate timer */
+	if (test_bit(0, &wdt->timer_alive))
+		mpcore_wdt_start(wdt);
+	return 0;
+}
+#else
+#define mpcore_wdt_suspend	NULL
+#define mpcore_wdt_resume	NULL
+#endif
+
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:mpcore_wdt");
 
 static struct platform_driver mpcore_wdt_driver = {
 	.probe		= mpcore_wdt_probe,
 	.remove		= __devexit_p(mpcore_wdt_remove),
+	.suspend	= mpcore_wdt_suspend,
+	.resume		= mpcore_wdt_resume,
 	.shutdown	= mpcore_wdt_shutdown,
 	.driver		= {
 		.owner	= THIS_MODULE,
diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c
index 0430e09..ac37bb8 100644
--- a/drivers/watchdog/mtx-1_wdt.c
+++ b/drivers/watchdog/mtx-1_wdt.c
@@ -225,11 +225,11 @@
 
 	ret = misc_register(&mtx1_wdt_misc);
 	if (ret < 0) {
-		printk(KERN_ERR " mtx-1_wdt : failed to register\n");
+		dev_err(&pdev->dev, "failed to register\n");
 		return ret;
 	}
 	mtx1_wdt_start();
-	printk(KERN_INFO "MTX-1 Watchdog driver\n");
+	dev_info(&pdev->dev, "MTX-1 Watchdog driver\n");
 	return 0;
 }
 
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
new file mode 100644
index 0000000..4ec741a
--- /dev/null
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -0,0 +1,433 @@
+/*
+*   of_xilinx_wdt.c  1.01  A Watchdog Device Driver for Xilinx xps_timebase_wdt
+*
+*   (C) Copyright 2011 (Alejandro Cabrera <aldaya@gmail.com>)
+*
+*       -----------------------
+*
+*   This program is free software; you can redistribute it and/or
+*   modify it under the terms of the GNU General Public License
+*   as published by the Free Software Foundation; either version
+*   2 of the License, or (at your option) any later version.
+*
+*       -----------------------
+*	30-May-2011 Alejandro Cabrera <aldaya@gmail.com>
+*		- If "xlnx,wdt-enable-once" wasn't found on device tree the
+*		  module will use CONFIG_WATCHDOG_NOWAYOUT
+*		- If the device tree parameters ("clock-frequency" and
+*		  "xlnx,wdt-interval") wasn't found the driver won't
+*		  know the wdt reset interval
+*/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/watchdog.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+
+/* Register offsets for the Wdt device */
+#define XWT_TWCSR0_OFFSET   0x0 /* Control/Status Register0 */
+#define XWT_TWCSR1_OFFSET   0x4 /* Control/Status Register1 */
+#define XWT_TBR_OFFSET      0x8 /* Timebase Register Offset */
+
+/* Control/Status Register Masks  */
+#define XWT_CSR0_WRS_MASK   0x00000008 /* Reset status */
+#define XWT_CSR0_WDS_MASK   0x00000004 /* Timer state  */
+#define XWT_CSR0_EWDT1_MASK 0x00000002 /* Enable bit 1 */
+
+/* Control/Status Register 0/1 bits  */
+#define XWT_CSRX_EWDT2_MASK 0x00000001 /* Enable bit 2 */
+
+/* SelfTest constants */
+#define XWT_MAX_SELFTEST_LOOP_COUNT 0x00010000
+#define XWT_TIMER_FAILED            0xFFFFFFFF
+
+#define WATCHDOG_NAME     "Xilinx Watchdog"
+#define PFX WATCHDOG_NAME ": "
+
+struct xwdt_device {
+	struct resource  res;
+	void __iomem *base;
+	u32 nowayout;
+	u32 wdt_interval;
+	u32 boot_status;
+};
+
+static struct xwdt_device xdev;
+
+static  u32 timeout;
+static  u32 control_status_reg;
+static  u8  expect_close;
+static  u8  no_timeout;
+static unsigned long driver_open;
+
+static  DEFINE_SPINLOCK(spinlock);
+
+static void xwdt_start(void)
+{
+	spin_lock(&spinlock);
+
+	/* Clean previous status and enable the watchdog timer */
+	control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+	control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK);
+
+	iowrite32((control_status_reg | XWT_CSR0_EWDT1_MASK),
+				xdev.base + XWT_TWCSR0_OFFSET);
+
+	iowrite32(XWT_CSRX_EWDT2_MASK, xdev.base + XWT_TWCSR1_OFFSET);
+
+	spin_unlock(&spinlock);
+}
+
+static void xwdt_stop(void)
+{
+	spin_lock(&spinlock);
+
+	control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+
+	iowrite32((control_status_reg & ~XWT_CSR0_EWDT1_MASK),
+				xdev.base + XWT_TWCSR0_OFFSET);
+
+	iowrite32(0, xdev.base + XWT_TWCSR1_OFFSET);
+
+	spin_unlock(&spinlock);
+	printk(KERN_INFO PFX "Stopped!\n");
+}
+
+static void xwdt_keepalive(void)
+{
+	spin_lock(&spinlock);
+
+	control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+	control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK);
+	iowrite32(control_status_reg, xdev.base + XWT_TWCSR0_OFFSET);
+
+	spin_unlock(&spinlock);
+}
+
+static void xwdt_get_status(int *status)
+{
+	int new_status;
+
+	spin_lock(&spinlock);
+
+	control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+	new_status = ((control_status_reg &
+			(XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK)) != 0);
+	spin_unlock(&spinlock);
+
+	*status = 0;
+	if (new_status & 1)
+		*status |= WDIOF_CARDRESET;
+}
+
+static u32 xwdt_selftest(void)
+{
+	int i;
+	u32 timer_value1;
+	u32 timer_value2;
+
+	spin_lock(&spinlock);
+
+	timer_value1 = ioread32(xdev.base + XWT_TBR_OFFSET);
+	timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET);
+
+	for (i = 0;
+		((i <= XWT_MAX_SELFTEST_LOOP_COUNT) &&
+			(timer_value2 == timer_value1)); i++) {
+		timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET);
+	}
+
+	spin_unlock(&spinlock);
+
+	if (timer_value2 != timer_value1)
+		return ~XWT_TIMER_FAILED;
+	else
+		return XWT_TIMER_FAILED;
+}
+
+static int xwdt_open(struct inode *inode, struct file *file)
+{
+	/* Only one process can handle the wdt at a time */
+	if (test_and_set_bit(0, &driver_open))
+		return -EBUSY;
+
+	/* Make sure that the module are always loaded...*/
+	if (xdev.nowayout)
+		__module_get(THIS_MODULE);
+
+	xwdt_start();
+	printk(KERN_INFO PFX "Started...\n");
+
+	return nonseekable_open(inode, file);
+}
+
+static int xwdt_release(struct inode *inode, struct file *file)
+{
+	if (expect_close == 42) {
+		xwdt_stop();
+	} else {
+		printk(KERN_CRIT PFX
+			"Unexpected close, not stopping watchdog!\n");
+		xwdt_keepalive();
+	}
+
+	clear_bit(0, &driver_open);
+	expect_close = 0;
+	return 0;
+}
+
+/*
+ *      xwdt_write:
+ *      @file: file handle to the watchdog
+ *      @buf: buffer to write (unused as data does not matter here
+ *      @count: count of bytes
+ *      @ppos: pointer to the position to write. No seeks allowed
+ *
+ *      A write to a watchdog device is defined as a keepalive signal. Any
+ *      write of data will do, as we don't define content meaning.
+ */
+static ssize_t xwdt_write(struct file *file, const char __user *buf,
+						size_t len, loff_t *ppos)
+{
+	if (len) {
+		if (!xdev.nowayout) {
+			size_t i;
+
+			/* In case it was set long ago */
+			expect_close = 0;
+
+			for (i = 0; i != len; i++) {
+				char c;
+
+				if (get_user(c, buf + i))
+					return -EFAULT;
+				if (c == 'V')
+					expect_close = 42;
+			}
+		}
+		xwdt_keepalive();
+	}
+	return len;
+}
+
+static const struct watchdog_info ident = {
+	.options =  WDIOF_MAGICCLOSE |
+		    WDIOF_KEEPALIVEPING,
+	.firmware_version =	1,
+	.identity =	WATCHDOG_NAME,
+};
+
+/*
+ *      xwdt_ioctl:
+ *      @file: file handle to the device
+ *      @cmd: watchdog command
+ *      @arg: argument pointer
+ *
+ *      The watchdog API defines a common set of functions for all watchdogs
+ *      according to their available features.
+ */
+static long xwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int status;
+
+	union {
+		struct watchdog_info __user *ident;
+		int __user *i;
+	} uarg;
+
+	uarg.i = (int __user *)arg;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		return copy_to_user(uarg.ident, &ident,
+					sizeof(ident)) ? -EFAULT : 0;
+
+	case WDIOC_GETBOOTSTATUS:
+		return put_user(xdev.boot_status, uarg.i);
+
+	case WDIOC_GETSTATUS:
+		xwdt_get_status(&status);
+		return put_user(status, uarg.i);
+
+	case WDIOC_KEEPALIVE:
+		xwdt_keepalive();
+		return 0;
+
+	case WDIOC_GETTIMEOUT:
+		if (no_timeout)
+			return -ENOTTY;
+		else
+			return put_user(timeout, uarg.i);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations xwdt_fops = {
+	.owner      = THIS_MODULE,
+	.llseek     = no_llseek,
+	.write      = xwdt_write,
+	.open       = xwdt_open,
+	.release    = xwdt_release,
+	.unlocked_ioctl = xwdt_ioctl,
+};
+
+static struct miscdevice xwdt_miscdev = {
+	.minor      = WATCHDOG_MINOR,
+	.name       = "watchdog",
+	.fops       = &xwdt_fops,
+};
+
+static int __devinit xwdt_probe(struct platform_device *pdev)
+{
+	int rc;
+	u32 *tmptr;
+	u32 *pfreq;
+
+	no_timeout = 0;
+
+	pfreq = (u32 *)of_get_property(pdev->dev.of_node->parent,
+					"clock-frequency", NULL);
+
+	if (pfreq == NULL) {
+		printk(KERN_WARNING PFX
+			"The watchdog clock frequency cannot be obtained!\n");
+		no_timeout = 1;
+	}
+
+	rc = of_address_to_resource(pdev->dev.of_node, 0, &xdev.res);
+	if (rc) {
+		printk(KERN_WARNING PFX "invalid address!\n");
+		return rc;
+	}
+
+	tmptr = (u32 *)of_get_property(pdev->dev.of_node,
+					"xlnx,wdt-interval", NULL);
+	if (tmptr == NULL) {
+		printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-interval\""
+					" not found in device tree!\n");
+		no_timeout = 1;
+	} else {
+		xdev.wdt_interval = *tmptr;
+	}
+
+	tmptr = (u32 *)of_get_property(pdev->dev.of_node,
+					"xlnx,wdt-enable-once", NULL);
+	if (tmptr == NULL) {
+		printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-enable-once\""
+					" not found in device tree!\n");
+		xdev.nowayout = WATCHDOG_NOWAYOUT;
+	}
+
+/*
+ *  Twice of the 2^wdt_interval / freq  because the first wdt overflow is
+ *  ignored (interrupt), reset is only generated at second wdt overflow
+ */
+	if (!no_timeout)
+		timeout = 2 * ((1<<xdev.wdt_interval) / *pfreq);
+
+	if (!request_mem_region(xdev.res.start,
+			xdev.res.end - xdev.res.start + 1, WATCHDOG_NAME)) {
+		rc = -ENXIO;
+		printk(KERN_ERR PFX "memory request failure!\n");
+		goto err_out;
+	}
+
+	xdev.base = ioremap(xdev.res.start, xdev.res.end - xdev.res.start + 1);
+	if (xdev.base == NULL) {
+		rc = -ENOMEM;
+		printk(KERN_ERR PFX "ioremap failure!\n");
+		goto release_mem;
+	}
+
+	rc = xwdt_selftest();
+	if (rc == XWT_TIMER_FAILED) {
+		printk(KERN_ERR PFX "SelfTest routine error!\n");
+		goto unmap_io;
+	}
+
+	xwdt_get_status(&xdev.boot_status);
+
+	rc = misc_register(&xwdt_miscdev);
+	if (rc) {
+		printk(KERN_ERR PFX
+			"cannot register miscdev on minor=%d (err=%d)\n",
+						xwdt_miscdev.minor, rc);
+		goto unmap_io;
+	}
+
+	if (no_timeout)
+		printk(KERN_INFO PFX
+			"driver loaded (timeout=? sec, nowayout=%d)\n",
+						    xdev.nowayout);
+	else
+		printk(KERN_INFO PFX
+			"driver loaded (timeout=%d sec, nowayout=%d)\n",
+					timeout, xdev.nowayout);
+
+	expect_close = 0;
+	clear_bit(0, &driver_open);
+
+	return 0;
+
+unmap_io:
+	iounmap(xdev.base);
+release_mem:
+	release_mem_region(xdev.res.start, resource_size(&xdev.res));
+err_out:
+	return rc;
+}
+
+static int __devexit xwdt_remove(struct platform_device *dev)
+{
+	misc_deregister(&xwdt_miscdev);
+	iounmap(xdev.base);
+	release_mem_region(xdev.res.start, resource_size(&xdev.res));
+
+	return 0;
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id __devinitdata xwdt_of_match[] = {
+	{ .compatible = "xlnx,xps-timebase-wdt-1.01.a", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, xwdt_of_match);
+
+static struct platform_driver xwdt_driver = {
+	.probe       = xwdt_probe,
+	.remove      = __devexit_p(xwdt_remove),
+	.driver = {
+		.owner = THIS_MODULE,
+		.name  = WATCHDOG_NAME,
+		.of_match_table = xwdt_of_match,
+	},
+};
+
+static int __init xwdt_init(void)
+{
+	return platform_driver_register(&xwdt_driver);
+}
+
+static void __exit xwdt_exit(void)
+{
+	platform_driver_unregister(&xwdt_driver);
+}
+
+module_init(xwdt_init);
+module_exit(xwdt_exit);
+
+MODULE_AUTHOR("Alejandro Cabrera <aldaya@gmail.com>");
+MODULE_DESCRIPTION("Xilinx Watchdog driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c
index b7c1390..e78d899 100644
--- a/drivers/watchdog/pc87413_wdt.c
+++ b/drivers/watchdog/pc87413_wdt.c
@@ -56,6 +56,7 @@
 #define IO_DEFAULT	0x2E		/* Address used on Portwell Boards */
 
 static int io = IO_DEFAULT;
+static int swc_base_addr = -1;
 
 static int timeout = DEFAULT_TIMEOUT;	/* timeout value */
 static unsigned long timer_enabled;	/* is the timer enabled? */
@@ -116,9 +117,8 @@
 
 /* Read SWC I/O base address */
 
-static inline unsigned int pc87413_get_swc_base(void)
+static void pc87413_get_swc_base_addr(void)
 {
-	unsigned int  swc_base_addr = 0;
 	unsigned char addr_l, addr_h = 0;
 
 	/* Step 3: Read SWC I/O Base Address */
@@ -136,12 +136,11 @@
 		"Read SWC I/O Base Address: low %d, high %d, res %d\n",
 						addr_l, addr_h, swc_base_addr);
 #endif
-	return swc_base_addr;
 }
 
 /* Select Bank 3 of SWC */
 
-static inline void pc87413_swc_bank3(unsigned int swc_base_addr)
+static inline void pc87413_swc_bank3(void)
 {
 	/* Step 4: Select Bank3 of SWC */
 	outb_p(inb(swc_base_addr + 0x0f) | 0x03, swc_base_addr + 0x0f);
@@ -152,8 +151,7 @@
 
 /* Set watchdog timeout to x minutes */
 
-static inline void pc87413_programm_wdto(unsigned int swc_base_addr,
-					 char pc87413_time)
+static inline void pc87413_programm_wdto(char pc87413_time)
 {
 	/* Step 5: Programm WDTO, Twd. */
 	outb_p(pc87413_time, swc_base_addr + WDTO);
@@ -164,7 +162,7 @@
 
 /* Enable WDEN */
 
-static inline void pc87413_enable_wden(unsigned int swc_base_addr)
+static inline void pc87413_enable_wden(void)
 {
 	/* Step 6: Enable WDEN */
 	outb_p(inb(swc_base_addr + WDCTL) | 0x01, swc_base_addr + WDCTL);
@@ -174,7 +172,7 @@
 }
 
 /* Enable SW_WD_TREN */
-static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr)
+static inline void pc87413_enable_sw_wd_tren(void)
 {
 	/* Enable SW_WD_TREN */
 	outb_p(inb(swc_base_addr + WDCFG) | 0x80, swc_base_addr + WDCFG);
@@ -185,7 +183,7 @@
 
 /* Disable SW_WD_TREN */
 
-static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr)
+static inline void pc87413_disable_sw_wd_tren(void)
 {
 	/* Disable SW_WD_TREN */
 	outb_p(inb(swc_base_addr + WDCFG) & 0x7f, swc_base_addr + WDCFG);
@@ -196,7 +194,7 @@
 
 /* Enable SW_WD_TRG */
 
-static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr)
+static inline void pc87413_enable_sw_wd_trg(void)
 {
 	/* Enable SW_WD_TRG */
 	outb_p(inb(swc_base_addr + WDCTL) | 0x80, swc_base_addr + WDCTL);
@@ -207,7 +205,7 @@
 
 /* Disable SW_WD_TRG */
 
-static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr)
+static inline void pc87413_disable_sw_wd_trg(void)
 {
 	/* Disable SW_WD_TRG */
 	outb_p(inb(swc_base_addr + WDCTL) & 0x7f, swc_base_addr + WDCTL);
@@ -222,18 +220,13 @@
 
 static void pc87413_enable(void)
 {
-	unsigned int swc_base_addr;
-
 	spin_lock(&io_lock);
 
-	pc87413_select_wdt_out();
-	pc87413_enable_swc();
-	swc_base_addr = pc87413_get_swc_base();
-	pc87413_swc_bank3(swc_base_addr);
-	pc87413_programm_wdto(swc_base_addr, timeout);
-	pc87413_enable_wden(swc_base_addr);
-	pc87413_enable_sw_wd_tren(swc_base_addr);
-	pc87413_enable_sw_wd_trg(swc_base_addr);
+	pc87413_swc_bank3();
+	pc87413_programm_wdto(timeout);
+	pc87413_enable_wden();
+	pc87413_enable_sw_wd_tren();
+	pc87413_enable_sw_wd_trg();
 
 	spin_unlock(&io_lock);
 }
@@ -242,17 +235,12 @@
 
 static void pc87413_disable(void)
 {
-	unsigned int swc_base_addr;
-
 	spin_lock(&io_lock);
 
-	pc87413_select_wdt_out();
-	pc87413_enable_swc();
-	swc_base_addr = pc87413_get_swc_base();
-	pc87413_swc_bank3(swc_base_addr);
-	pc87413_disable_sw_wd_tren(swc_base_addr);
-	pc87413_disable_sw_wd_trg(swc_base_addr);
-	pc87413_programm_wdto(swc_base_addr, 0);
+	pc87413_swc_bank3();
+	pc87413_disable_sw_wd_tren();
+	pc87413_disable_sw_wd_trg();
+	pc87413_programm_wdto(0);
 
 	spin_unlock(&io_lock);
 }
@@ -261,20 +249,15 @@
 
 static void pc87413_refresh(void)
 {
-	unsigned int swc_base_addr;
-
 	spin_lock(&io_lock);
 
-	pc87413_select_wdt_out();
-	pc87413_enable_swc();
-	swc_base_addr = pc87413_get_swc_base();
-	pc87413_swc_bank3(swc_base_addr);
-	pc87413_disable_sw_wd_tren(swc_base_addr);
-	pc87413_disable_sw_wd_trg(swc_base_addr);
-	pc87413_programm_wdto(swc_base_addr, timeout);
-	pc87413_enable_wden(swc_base_addr);
-	pc87413_enable_sw_wd_tren(swc_base_addr);
-	pc87413_enable_sw_wd_trg(swc_base_addr);
+	pc87413_swc_bank3();
+	pc87413_disable_sw_wd_tren();
+	pc87413_disable_sw_wd_trg();
+	pc87413_programm_wdto(timeout);
+	pc87413_enable_wden();
+	pc87413_enable_sw_wd_tren();
+	pc87413_enable_sw_wd_trg();
 
 	spin_unlock(&io_lock);
 }
@@ -528,7 +511,8 @@
 	printk(KERN_INFO PFX "Version " VERSION " at io 0x%X\n",
 							WDT_INDEX_IO_PORT);
 
-	/* request_region(io, 2, "pc87413"); */
+	if (!request_muxed_region(io, 2, MODNAME))
+		return -EBUSY;
 
 	ret = register_reboot_notifier(&pc87413_notifier);
 	if (ret != 0) {
@@ -541,12 +525,32 @@
 		printk(KERN_ERR PFX
 			"cannot register miscdev on minor=%d (err=%d)\n",
 			WATCHDOG_MINOR, ret);
-		unregister_reboot_notifier(&pc87413_notifier);
-		return ret;
+		goto reboot_unreg;
 	}
 	printk(KERN_INFO PFX "initialized. timeout=%d min \n", timeout);
+
+	pc87413_select_wdt_out();
+	pc87413_enable_swc();
+	pc87413_get_swc_base_addr();
+
+	if (!request_region(swc_base_addr, 0x20, MODNAME)) {
+		printk(KERN_ERR PFX
+			"cannot request SWC region at 0x%x\n", swc_base_addr);
+		ret = -EBUSY;
+		goto misc_unreg;
+	}
+
 	pc87413_enable();
+
+	release_region(io, 2);
 	return 0;
+
+misc_unreg:
+	misc_deregister(&pc87413_miscdev);
+reboot_unreg:
+	unregister_reboot_notifier(&pc87413_notifier);
+	release_region(io, 2);
+	return ret;
 }
 
 /**
@@ -569,7 +573,7 @@
 
 	misc_deregister(&pc87413_miscdev);
 	unregister_reboot_notifier(&pc87413_notifier);
-	/* release_region(io, 2); */
+	release_region(swc_base_addr, 0x20);
 
 	printk(KERN_INFO MODNAME " watchdog component driver removed.\n");
 }
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index f7f5aa0..30da88f 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -589,6 +589,15 @@
 #define s3c2410wdt_resume  NULL
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_OF
+static const struct of_device_id s3c2410_wdt_match[] = {
+	{ .compatible = "samsung,s3c2410-wdt" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
+#else
+#define s3c2410_wdt_match NULL
+#endif
 
 static struct platform_driver s3c2410wdt_driver = {
 	.probe		= s3c2410wdt_probe,
@@ -599,6 +608,7 @@
 	.driver		= {
 		.owner	= THIS_MODULE,
 		.name	= "s3c2410-wdt",
+		.of_match_table	= s3c2410_wdt_match,
 	},
 };
 
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index c7cf4b0..029467e 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -472,15 +472,10 @@
 	sch311x_wdt_stop();
 }
 
-#define sch311x_wdt_suspend NULL
-#define sch311x_wdt_resume  NULL
-
 static struct platform_driver sch311x_wdt_driver = {
 	.probe		= sch311x_wdt_probe,
 	.remove		= __devexit_p(sch311x_wdt_remove),
 	.shutdown	= sch311x_wdt_shutdown,
-	.suspend	= sch311x_wdt_suspend,
-	.resume		= sch311x_wdt_resume,
 	.driver		= {
 		.owner = THIS_MODULE,
 		.name = DRV_NAME,
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index 0d80e08..cc2cfbe 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -134,6 +134,8 @@
 	writel(INT_ENABLE | RESET_ENABLE, wdt->base + WDTCONTROL);
 	writel(LOCK, wdt->base + WDTLOCK);
 
+	/* Flush posted writes. */
+	readl(wdt->base + WDTLOCK);
 	spin_unlock(&wdt->lock);
 }
 
@@ -144,9 +146,10 @@
 
 	writel(UNLOCK, wdt->base + WDTLOCK);
 	writel(0, wdt->base + WDTCONTROL);
-	writel(0, wdt->base + WDTLOAD);
 	writel(LOCK, wdt->base + WDTLOCK);
 
+	/* Flush posted writes. */
+	readl(wdt->base + WDTLOCK);
 	spin_unlock(&wdt->lock);
 }
 
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
new file mode 100644
index 0000000..cfa1a15
--- /dev/null
+++ b/drivers/watchdog/watchdog_core.c
@@ -0,0 +1,111 @@
+/*
+ *	watchdog_core.c
+ *
+ *	(c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ *						All Rights Reserved.
+ *
+ *	(c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *	This source code is part of the generic code that can be used
+ *	by all the watchdog timer drivers.
+ *
+ *	Based on source code of the following authors:
+ *	  Matt Domsch <Matt_Domsch@dell.com>,
+ *	  Rob Radez <rob@osinvestor.com>,
+ *	  Rusty Lynch <rusty@linux.co.intel.com>
+ *	  Satyam Sharma <satyam@infradead.org>
+ *	  Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ *	admit liability nor provide warranty for any of this software.
+ *	This material is provided "AS-IS" and at no charge.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>	/* For EXPORT_SYMBOL/module stuff/... */
+#include <linux/types.h>	/* For standard types */
+#include <linux/errno.h>	/* For the -ENODEV/... values */
+#include <linux/kernel.h>	/* For printk/panic/... */
+#include <linux/watchdog.h>	/* For watchdog specific items */
+#include <linux/init.h>		/* For __init/__exit/... */
+
+#include "watchdog_dev.h"	/* For watchdog_dev_register/... */
+
+/**
+ * watchdog_register_device() - register a watchdog device
+ * @wdd: watchdog device
+ *
+ * Register a watchdog device with the kernel so that the
+ * watchdog timer can be accessed from userspace.
+ *
+ * A zero is returned on success and a negative errno code for
+ * failure.
+ */
+int watchdog_register_device(struct watchdog_device *wdd)
+{
+	int ret;
+
+	if (wdd == NULL || wdd->info == NULL || wdd->ops == NULL)
+		return -EINVAL;
+
+	/* Mandatory operations need to be supported */
+	if (wdd->ops->start == NULL || wdd->ops->stop == NULL)
+		return -EINVAL;
+
+	/*
+	 * Check that we have valid min and max timeout values, if
+	 * not reset them both to 0 (=not used or unknown)
+	 */
+	if (wdd->min_timeout > wdd->max_timeout) {
+		pr_info("Invalid min and max timeout values, resetting to 0!\n");
+		wdd->min_timeout = 0;
+		wdd->max_timeout = 0;
+	}
+
+	/*
+	 * Note: now that all watchdog_device data has been verified, we
+	 * will not check this anymore in other functions. If data gets
+	 * corrupted in a later stage then we expect a kernel panic!
+	 */
+
+	/* We only support 1 watchdog device via the /dev/watchdog interface */
+	ret = watchdog_dev_register(wdd);
+	if (ret) {
+		pr_err("error registering /dev/watchdog (err=%d).\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(watchdog_register_device);
+
+/**
+ * watchdog_unregister_device() - unregister a watchdog device
+ * @wdd: watchdog device to unregister
+ *
+ * Unregister a watchdog device that was previously successfully
+ * registered with watchdog_register_device().
+ */
+void watchdog_unregister_device(struct watchdog_device *wdd)
+{
+	int ret;
+
+	if (wdd == NULL)
+		return;
+
+	ret = watchdog_dev_unregister(wdd);
+	if (ret)
+		pr_err("error unregistering /dev/watchdog (err=%d).\n", ret);
+}
+EXPORT_SYMBOL_GPL(watchdog_unregister_device);
+
+MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>");
+MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>");
+MODULE_DESCRIPTION("WatchDog Timer Driver Core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
new file mode 100644
index 0000000..d33520d
--- /dev/null
+++ b/drivers/watchdog/watchdog_dev.c
@@ -0,0 +1,395 @@
+/*
+ *	watchdog_dev.c
+ *
+ *	(c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ *						All Rights Reserved.
+ *
+ *	(c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *
+ *	This source code is part of the generic code that can be used
+ *	by all the watchdog timer drivers.
+ *
+ *	This part of the generic code takes care of the following
+ *	misc device: /dev/watchdog.
+ *
+ *	Based on source code of the following authors:
+ *	  Matt Domsch <Matt_Domsch@dell.com>,
+ *	  Rob Radez <rob@osinvestor.com>,
+ *	  Rusty Lynch <rusty@linux.co.intel.com>
+ *	  Satyam Sharma <satyam@infradead.org>
+ *	  Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ *	admit liability nor provide warranty for any of this software.
+ *	This material is provided "AS-IS" and at no charge.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>	/* For module stuff/... */
+#include <linux/types.h>	/* For standard types (like size_t) */
+#include <linux/errno.h>	/* For the -ENODEV/... values */
+#include <linux/kernel.h>	/* For printk/panic/... */
+#include <linux/fs.h>		/* For file operations */
+#include <linux/watchdog.h>	/* For watchdog specific items */
+#include <linux/miscdevice.h>	/* For handling misc devices */
+#include <linux/init.h>		/* For __init/__exit/... */
+#include <linux/uaccess.h>	/* For copy_to_user/put_user/... */
+
+/* make sure we only register one /dev/watchdog device */
+static unsigned long watchdog_dev_busy;
+/* the watchdog device behind /dev/watchdog */
+static struct watchdog_device *wdd;
+
+/*
+ *	watchdog_ping: ping the watchdog.
+ *	@wddev: the watchdog device to ping
+ *
+ *	If the watchdog has no own ping operation then it needs to be
+ *	restarted via the start operation. This wrapper function does
+ *	exactly that.
+ *	We only ping when the watchdog device is running.
+ */
+
+static int watchdog_ping(struct watchdog_device *wddev)
+{
+	if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+		if (wddev->ops->ping)
+			return wddev->ops->ping(wddev);  /* ping the watchdog */
+		else
+			return wddev->ops->start(wddev); /* restart watchdog */
+	}
+	return 0;
+}
+
+/*
+ *	watchdog_start: wrapper to start the watchdog.
+ *	@wddev: the watchdog device to start
+ *
+ *	Start the watchdog if it is not active and mark it active.
+ *	This function returns zero on success or a negative errno code for
+ *	failure.
+ */
+
+static int watchdog_start(struct watchdog_device *wddev)
+{
+	int err;
+
+	if (!test_bit(WDOG_ACTIVE, &wdd->status)) {
+		err = wddev->ops->start(wddev);
+		if (err < 0)
+			return err;
+
+		set_bit(WDOG_ACTIVE, &wdd->status);
+	}
+	return 0;
+}
+
+/*
+ *	watchdog_stop: wrapper to stop the watchdog.
+ *	@wddev: the watchdog device to stop
+ *
+ *	Stop the watchdog if it is still active and unmark it active.
+ *	This function returns zero on success or a negative errno code for
+ *	failure.
+ *	If the 'nowayout' feature was set, the watchdog cannot be stopped.
+ */
+
+static int watchdog_stop(struct watchdog_device *wddev)
+{
+	int err = -EBUSY;
+
+	if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) {
+		pr_info("%s: nowayout prevents watchdog to be stopped!\n",
+							wdd->info->identity);
+		return err;
+	}
+
+	if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+		err = wddev->ops->stop(wddev);
+		if (err < 0)
+			return err;
+
+		clear_bit(WDOG_ACTIVE, &wdd->status);
+	}
+	return 0;
+}
+
+/*
+ *	watchdog_write: writes to the watchdog.
+ *	@file: file from VFS
+ *	@data: user address of data
+ *	@len: length of data
+ *	@ppos: pointer to the file offset
+ *
+ *	A write to a watchdog device is defined as a keepalive ping.
+ *	Writing the magic 'V' sequence allows the next close to turn
+ *	off the watchdog (if 'nowayout' is not set).
+ */
+
+static ssize_t watchdog_write(struct file *file, const char __user *data,
+						size_t len, loff_t *ppos)
+{
+	size_t i;
+	char c;
+
+	if (len == 0)
+		return 0;
+
+	/*
+	 * Note: just in case someone wrote the magic character
+	 * five months ago...
+	 */
+	clear_bit(WDOG_ALLOW_RELEASE, &wdd->status);
+
+	/* scan to see whether or not we got the magic character */
+	for (i = 0; i != len; i++) {
+		if (get_user(c, data + i))
+			return -EFAULT;
+		if (c == 'V')
+			set_bit(WDOG_ALLOW_RELEASE, &wdd->status);
+	}
+
+	/* someone wrote to us, so we send the watchdog a keepalive ping */
+	watchdog_ping(wdd);
+
+	return len;
+}
+
+/*
+ *	watchdog_ioctl: handle the different ioctl's for the watchdog device.
+ *	@file: file handle to the device
+ *	@cmd: watchdog command
+ *	@arg: argument pointer
+ *
+ *	The watchdog API defines a common set of functions for all watchdogs
+ *	according to their available features.
+ */
+
+static long watchdog_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int __user *p = argp;
+	unsigned int val;
+	int err;
+
+	if (wdd->ops->ioctl) {
+		err = wdd->ops->ioctl(wdd, cmd, arg);
+		if (err != -ENOIOCTLCMD)
+			return err;
+	}
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		return copy_to_user(argp, wdd->info,
+			sizeof(struct watchdog_info)) ? -EFAULT : 0;
+	case WDIOC_GETSTATUS:
+		val = wdd->ops->status ? wdd->ops->status(wdd) : 0;
+		return put_user(val, p);
+	case WDIOC_GETBOOTSTATUS:
+		return put_user(wdd->bootstatus, p);
+	case WDIOC_SETOPTIONS:
+		if (get_user(val, p))
+			return -EFAULT;
+		if (val & WDIOS_DISABLECARD) {
+			err = watchdog_stop(wdd);
+			if (err < 0)
+				return err;
+		}
+		if (val & WDIOS_ENABLECARD) {
+			err = watchdog_start(wdd);
+			if (err < 0)
+				return err;
+		}
+		return 0;
+	case WDIOC_KEEPALIVE:
+		if (!(wdd->info->options & WDIOF_KEEPALIVEPING))
+			return -EOPNOTSUPP;
+		watchdog_ping(wdd);
+		return 0;
+	case WDIOC_SETTIMEOUT:
+		if ((wdd->ops->set_timeout == NULL) ||
+		    !(wdd->info->options & WDIOF_SETTIMEOUT))
+			return -EOPNOTSUPP;
+		if (get_user(val, p))
+			return -EFAULT;
+		if ((wdd->max_timeout != 0) &&
+		    (val < wdd->min_timeout || val > wdd->max_timeout))
+				return -EINVAL;
+		err = wdd->ops->set_timeout(wdd, val);
+		if (err < 0)
+			return err;
+		wdd->timeout = val;
+		/* If the watchdog is active then we send a keepalive ping
+		 * to make sure that the watchdog keep's running (and if
+		 * possible that it takes the new timeout) */
+		watchdog_ping(wdd);
+		/* Fall */
+	case WDIOC_GETTIMEOUT:
+		/* timeout == 0 means that we don't know the timeout */
+		if (wdd->timeout == 0)
+			return -EOPNOTSUPP;
+		return put_user(wdd->timeout, p);
+	default:
+		return -ENOTTY;
+	}
+}
+
+/*
+ *	watchdog_open: open the /dev/watchdog device.
+ *	@inode: inode of device
+ *	@file: file handle to device
+ *
+ *	When the /dev/watchdog device gets opened, we start the watchdog.
+ *	Watch out: the /dev/watchdog device is single open, so we make sure
+ *	it can only be opened once.
+ */
+
+static int watchdog_open(struct inode *inode, struct file *file)
+{
+	int err = -EBUSY;
+
+	/* the watchdog is single open! */
+	if (test_and_set_bit(WDOG_DEV_OPEN, &wdd->status))
+		return -EBUSY;
+
+	/*
+	 * If the /dev/watchdog device is open, we don't want the module
+	 * to be unloaded.
+	 */
+	if (!try_module_get(wdd->ops->owner))
+		goto out;
+
+	err = watchdog_start(wdd);
+	if (err < 0)
+		goto out_mod;
+
+	/* dev/watchdog is a virtual (and thus non-seekable) filesystem */
+	return nonseekable_open(inode, file);
+
+out_mod:
+	module_put(wdd->ops->owner);
+out:
+	clear_bit(WDOG_DEV_OPEN, &wdd->status);
+	return err;
+}
+
+/*
+ *      watchdog_release: release the /dev/watchdog device.
+ *      @inode: inode of device
+ *      @file: file handle to device
+ *
+ *	This is the code for when /dev/watchdog gets closed. We will only
+ *	stop the watchdog when we have received the magic char (and nowayout
+ *	was not set), else the watchdog will keep running.
+ */
+
+static int watchdog_release(struct inode *inode, struct file *file)
+{
+	int err = -EBUSY;
+
+	/*
+	 * We only stop the watchdog if we received the magic character
+	 * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then
+	 * watchdog_stop will fail.
+	 */
+	if (test_and_clear_bit(WDOG_ALLOW_RELEASE, &wdd->status) ||
+	    !(wdd->info->options & WDIOF_MAGICCLOSE))
+		err = watchdog_stop(wdd);
+
+	/* If the watchdog was not stopped, send a keepalive ping */
+	if (err < 0) {
+		pr_crit("%s: watchdog did not stop!\n", wdd->info->identity);
+		watchdog_ping(wdd);
+	}
+
+	/* Allow the owner module to be unloaded again */
+	module_put(wdd->ops->owner);
+
+	/* make sure that /dev/watchdog can be re-opened */
+	clear_bit(WDOG_DEV_OPEN, &wdd->status);
+
+	return 0;
+}
+
+static const struct file_operations watchdog_fops = {
+	.owner		= THIS_MODULE,
+	.write		= watchdog_write,
+	.unlocked_ioctl	= watchdog_ioctl,
+	.open		= watchdog_open,
+	.release	= watchdog_release,
+};
+
+static struct miscdevice watchdog_miscdev = {
+	.minor		= WATCHDOG_MINOR,
+	.name		= "watchdog",
+	.fops		= &watchdog_fops,
+};
+
+/*
+ *	watchdog_dev_register:
+ *	@watchdog: watchdog device
+ *
+ *	Register a watchdog device as /dev/watchdog. /dev/watchdog
+ *	is actually a miscdevice and thus we set it up like that.
+ */
+
+int watchdog_dev_register(struct watchdog_device *watchdog)
+{
+	int err;
+
+	/* Only one device can register for /dev/watchdog */
+	if (test_and_set_bit(0, &watchdog_dev_busy)) {
+		pr_err("only one watchdog can use /dev/watchdog.\n");
+		return -EBUSY;
+	}
+
+	wdd = watchdog;
+
+	err = misc_register(&watchdog_miscdev);
+	if (err != 0) {
+		pr_err("%s: cannot register miscdev on minor=%d (err=%d).\n",
+			watchdog->info->identity, WATCHDOG_MINOR, err);
+		goto out;
+	}
+
+	return 0;
+
+out:
+	wdd = NULL;
+	clear_bit(0, &watchdog_dev_busy);
+	return err;
+}
+
+/*
+ *	watchdog_dev_unregister:
+ *	@watchdog: watchdog device
+ *
+ *	Deregister the /dev/watchdog device.
+ */
+
+int watchdog_dev_unregister(struct watchdog_device *watchdog)
+{
+	/* Check that a watchdog device was registered in the past */
+	if (!test_bit(0, &watchdog_dev_busy) || !wdd)
+		return -ENODEV;
+
+	/* We can only unregister the watchdog device that was registered */
+	if (watchdog != wdd) {
+		pr_err("%s: watchdog was not registered as /dev/watchdog.\n",
+			watchdog->info->identity);
+		return -ENODEV;
+	}
+
+	misc_deregister(&watchdog_miscdev);
+	wdd = NULL;
+	clear_bit(0, &watchdog_dev_busy);
+	return 0;
+}
diff --git a/drivers/watchdog/watchdog_dev.h b/drivers/watchdog/watchdog_dev.h
new file mode 100644
index 0000000..bc7612b
--- /dev/null
+++ b/drivers/watchdog/watchdog_dev.h
@@ -0,0 +1,33 @@
+/*
+ *	watchdog_core.h
+ *
+ *	(c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ *						All Rights Reserved.
+ *
+ *	(c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *	This source code is part of the generic code that can be used
+ *	by all the watchdog timer drivers.
+ *
+ *	Based on source code of the following authors:
+ *	  Matt Domsch <Matt_Domsch@dell.com>,
+ *	  Rob Radez <rob@osinvestor.com>,
+ *	  Rusty Lynch <rusty@linux.co.intel.com>
+ *	  Satyam Sharma <satyam@infradead.org>
+ *	  Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ *	admit liability nor provide warranty for any of this software.
+ *	This material is provided "AS-IS" and at no charge.
+ */
+
+/*
+ *	Functions/procedures to be called by the core
+ */
+int watchdog_dev_register(struct watchdog_device *);
+int watchdog_dev_unregister(struct watchdog_device *);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 4d433d3..f11e43e 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -187,7 +187,7 @@
  */
 static struct inode *anon_inode_mkinode(void)
 {
-	struct inode *inode = new_inode(anon_inode_mnt->mnt_sb);
+	struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
 
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 52d7eca..502b9e9 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -34,6 +34,9 @@
 	 */
 	struct btrfs_key location;
 
+	/* Lock for counters */
+	spinlock_t lock;
+
 	/* the extent_tree has caches of all the extent mappings to disk */
 	struct extent_map_tree extent_tree;
 
@@ -134,8 +137,8 @@
 	 * items we think we'll end up using, and reserved_extents is the number
 	 * of extent items we've reserved metadata for.
 	 */
-	atomic_t outstanding_extents;
-	atomic_t reserved_extents;
+	unsigned outstanding_extents;
+	unsigned reserved_extents;
 
 	/*
 	 * ordered_data_close is set by truncate when a file that used
@@ -184,4 +187,13 @@
 	BTRFS_I(inode)->disk_i_size = size;
 }
 
+static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
+				       struct inode *inode)
+{
+	if (root == root->fs_info->tree_root ||
+	    BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+		return true;
+	return false;
+}
+
 #endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2e66786..011cab3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -54,8 +54,13 @@
 {
 	int i;
 	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-		if (p->nodes[i] && p->locks[i])
-			btrfs_set_lock_blocking(p->nodes[i]);
+		if (!p->nodes[i] || !p->locks[i])
+			continue;
+		btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
+		if (p->locks[i] == BTRFS_READ_LOCK)
+			p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
+		else if (p->locks[i] == BTRFS_WRITE_LOCK)
+			p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
 	}
 }
 
@@ -68,7 +73,7 @@
  * for held
  */
 noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
-					struct extent_buffer *held)
+					struct extent_buffer *held, int held_rw)
 {
 	int i;
 
@@ -79,19 +84,29 @@
 	 * really sure by forcing the path to blocking before we clear
 	 * the path blocking.
 	 */
-	if (held)
-		btrfs_set_lock_blocking(held);
+	if (held) {
+		btrfs_set_lock_blocking_rw(held, held_rw);
+		if (held_rw == BTRFS_WRITE_LOCK)
+			held_rw = BTRFS_WRITE_LOCK_BLOCKING;
+		else if (held_rw == BTRFS_READ_LOCK)
+			held_rw = BTRFS_READ_LOCK_BLOCKING;
+	}
 	btrfs_set_path_blocking(p);
 #endif
 
 	for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
-		if (p->nodes[i] && p->locks[i])
-			btrfs_clear_lock_blocking(p->nodes[i]);
+		if (p->nodes[i] && p->locks[i]) {
+			btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
+			if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
+				p->locks[i] = BTRFS_WRITE_LOCK;
+			else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
+				p->locks[i] = BTRFS_READ_LOCK;
+		}
 	}
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	if (held)
-		btrfs_clear_lock_blocking(held);
+		btrfs_clear_lock_blocking_rw(held, held_rw);
 #endif
 }
 
@@ -119,7 +134,7 @@
 		if (!p->nodes[i])
 			continue;
 		if (p->locks[i]) {
-			btrfs_tree_unlock(p->nodes[i]);
+			btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
 			p->locks[i] = 0;
 		}
 		free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@
 	return eb;
 }
 
+/* loop around taking references on and locking the root node of the
+ * tree until you end up with a lock on the root.  A locked buffer
+ * is returned, with a reference held.
+ */
+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+{
+	struct extent_buffer *eb;
+
+	while (1) {
+		eb = btrfs_root_node(root);
+		btrfs_tree_read_lock(eb);
+		if (eb == root->node)
+			break;
+		btrfs_tree_read_unlock(eb);
+		free_extent_buffer(eb);
+	}
+	return eb;
+}
+
 /* cowonly root (everything not a reference counted cow subvolume), just get
  * put onto a simple dirty list.  transaction.c walks this to make sure they
  * get properly updated on disk.
@@ -626,14 +660,6 @@
 	for (i = start_slot; i < end_slot; i++) {
 		int close = 1;
 
-		if (!parent->map_token) {
-			map_extent_buffer(parent,
-					btrfs_node_key_ptr_offset(i),
-					sizeof(struct btrfs_key_ptr),
-					&parent->map_token, &parent->kaddr,
-					&parent->map_start, &parent->map_len,
-					KM_USER1);
-		}
 		btrfs_node_key(parent, &disk_key, i);
 		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
 			continue;
@@ -656,11 +682,6 @@
 			last_block = blocknr;
 			continue;
 		}
-		if (parent->map_token) {
-			unmap_extent_buffer(parent, parent->map_token,
-					    KM_USER1);
-			parent->map_token = NULL;
-		}
 
 		cur = btrfs_find_tree_block(root, blocknr, blocksize);
 		if (cur)
@@ -701,11 +722,6 @@
 		btrfs_tree_unlock(cur);
 		free_extent_buffer(cur);
 	}
-	if (parent->map_token) {
-		unmap_extent_buffer(parent, parent->map_token,
-				    KM_USER1);
-		parent->map_token = NULL;
-	}
 	return err;
 }
 
@@ -746,7 +762,6 @@
 	struct btrfs_disk_key *tmp = NULL;
 	struct btrfs_disk_key unaligned;
 	unsigned long offset;
-	char *map_token = NULL;
 	char *kaddr = NULL;
 	unsigned long map_start = 0;
 	unsigned long map_len = 0;
@@ -756,18 +771,13 @@
 		mid = (low + high) / 2;
 		offset = p + mid * item_size;
 
-		if (!map_token || offset < map_start ||
+		if (!kaddr || offset < map_start ||
 		    (offset + sizeof(struct btrfs_disk_key)) >
 		    map_start + map_len) {
-			if (map_token) {
-				unmap_extent_buffer(eb, map_token, KM_USER0);
-				map_token = NULL;
-			}
 
 			err = map_private_extent_buffer(eb, offset,
 						sizeof(struct btrfs_disk_key),
-						&map_token, &kaddr,
-						&map_start, &map_len, KM_USER0);
+						&kaddr, &map_start, &map_len);
 
 			if (!err) {
 				tmp = (struct btrfs_disk_key *)(kaddr + offset -
@@ -790,14 +800,10 @@
 			high = mid;
 		else {
 			*slot = mid;
-			if (map_token)
-				unmap_extent_buffer(eb, map_token, KM_USER0);
 			return 0;
 		}
 	}
 	*slot = low;
-	if (map_token)
-		unmap_extent_buffer(eb, map_token, KM_USER0);
 	return 1;
 }
 
@@ -890,7 +896,8 @@
 
 	mid = path->nodes[level];
 
-	WARN_ON(!path->locks[level]);
+	WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
+		path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
 
 	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1228,7 +1235,6 @@
 	u32 nr;
 	u32 blocksize;
 	u32 nscan = 0;
-	bool map = true;
 
 	if (level != 1)
 		return;
@@ -1250,19 +1256,8 @@
 
 	nritems = btrfs_header_nritems(node);
 	nr = slot;
-	if (node->map_token || path->skip_locking)
-		map = false;
 
 	while (1) {
-		if (map && !node->map_token) {
-			unsigned long offset = btrfs_node_key_ptr_offset(nr);
-			map_private_extent_buffer(node, offset,
-						  sizeof(struct btrfs_key_ptr),
-						  &node->map_token,
-						  &node->kaddr,
-						  &node->map_start,
-						  &node->map_len, KM_USER1);
-		}
 		if (direction < 0) {
 			if (nr == 0)
 				break;
@@ -1281,11 +1276,6 @@
 		if ((search <= target && target - search <= 65536) ||
 		    (search > target && search - target <= 65536)) {
 			gen = btrfs_node_ptr_generation(node, nr);
-			if (map && node->map_token) {
-				unmap_extent_buffer(node, node->map_token,
-						    KM_USER1);
-				node->map_token = NULL;
-			}
 			readahead_tree_block(root, search, blocksize, gen);
 			nread += blocksize;
 		}
@@ -1293,10 +1283,6 @@
 		if ((nread > 65536 || nscan > 32))
 			break;
 	}
-	if (map && node->map_token) {
-		unmap_extent_buffer(node, node->map_token, KM_USER1);
-		node->map_token = NULL;
-	}
 }
 
 /*
@@ -1409,7 +1395,7 @@
 
 		t = path->nodes[i];
 		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
-			btrfs_tree_unlock(t);
+			btrfs_tree_unlock_rw(t, path->locks[i]);
 			path->locks[i] = 0;
 		}
 	}
@@ -1436,7 +1422,7 @@
 			continue;
 		if (!path->locks[i])
 			continue;
-		btrfs_tree_unlock(path->nodes[i]);
+		btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
 		path->locks[i] = 0;
 	}
 }
@@ -1485,6 +1471,8 @@
 			 * we can trust our generation number
 			 */
 			free_extent_buffer(tmp);
+			btrfs_set_path_blocking(p);
+
 			tmp = read_tree_block(root, blocknr, blocksize, gen);
 			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
 				*eb_ret = tmp;
@@ -1540,20 +1528,27 @@
 static int
 setup_nodes_for_search(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct btrfs_path *p,
-		       struct extent_buffer *b, int level, int ins_len)
+		       struct extent_buffer *b, int level, int ins_len,
+		       int *write_lock_level)
 {
 	int ret;
 	if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
 	    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
 		int sret;
 
+		if (*write_lock_level < level + 1) {
+			*write_lock_level = level + 1;
+			btrfs_release_path(p);
+			goto again;
+		}
+
 		sret = reada_for_balance(root, p, level);
 		if (sret)
 			goto again;
 
 		btrfs_set_path_blocking(p);
 		sret = split_node(trans, root, p, level);
-		btrfs_clear_path_blocking(p, NULL);
+		btrfs_clear_path_blocking(p, NULL, 0);
 
 		BUG_ON(sret > 0);
 		if (sret) {
@@ -1565,13 +1560,19 @@
 		   BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
 		int sret;
 
+		if (*write_lock_level < level + 1) {
+			*write_lock_level = level + 1;
+			btrfs_release_path(p);
+			goto again;
+		}
+
 		sret = reada_for_balance(root, p, level);
 		if (sret)
 			goto again;
 
 		btrfs_set_path_blocking(p);
 		sret = balance_level(trans, root, p, level);
-		btrfs_clear_path_blocking(p, NULL);
+		btrfs_clear_path_blocking(p, NULL, 0);
 
 		if (sret) {
 			ret = sret;
@@ -1615,27 +1616,78 @@
 	int err;
 	int level;
 	int lowest_unlock = 1;
+	int root_lock;
+	/* everything at write_lock_level or lower must be write locked */
+	int write_lock_level = 0;
 	u8 lowest_level = 0;
 
 	lowest_level = p->lowest_level;
 	WARN_ON(lowest_level && ins_len > 0);
 	WARN_ON(p->nodes[0] != NULL);
 
-	if (ins_len < 0)
+	if (ins_len < 0) {
 		lowest_unlock = 2;
 
+		/* when we are removing items, we might have to go up to level
+		 * two as we update tree pointers  Make sure we keep write
+		 * for those levels as well
+		 */
+		write_lock_level = 2;
+	} else if (ins_len > 0) {
+		/*
+		 * for inserting items, make sure we have a write lock on
+		 * level 1 so we can update keys
+		 */
+		write_lock_level = 1;
+	}
+
+	if (!cow)
+		write_lock_level = -1;
+
+	if (cow && (p->keep_locks || p->lowest_level))
+		write_lock_level = BTRFS_MAX_LEVEL;
+
 again:
+	/*
+	 * we try very hard to do read locks on the root
+	 */
+	root_lock = BTRFS_READ_LOCK;
+	level = 0;
 	if (p->search_commit_root) {
+		/*
+		 * the commit roots are read only
+		 * so we always do read locks
+		 */
 		b = root->commit_root;
 		extent_buffer_get(b);
+		level = btrfs_header_level(b);
 		if (!p->skip_locking)
-			btrfs_tree_lock(b);
+			btrfs_tree_read_lock(b);
 	} else {
-		if (p->skip_locking)
+		if (p->skip_locking) {
 			b = btrfs_root_node(root);
-		else
-			b = btrfs_lock_root_node(root);
+			level = btrfs_header_level(b);
+		} else {
+			/* we don't know the level of the root node
+			 * until we actually have it read locked
+			 */
+			b = btrfs_read_lock_root_node(root);
+			level = btrfs_header_level(b);
+			if (level <= write_lock_level) {
+				/* whoops, must trade for write lock */
+				btrfs_tree_read_unlock(b);
+				free_extent_buffer(b);
+				b = btrfs_lock_root_node(root);
+				root_lock = BTRFS_WRITE_LOCK;
+
+				/* the level might have changed, check again */
+				level = btrfs_header_level(b);
+			}
+		}
 	}
+	p->nodes[level] = b;
+	if (!p->skip_locking)
+		p->locks[level] = root_lock;
 
 	while (b) {
 		level = btrfs_header_level(b);
@@ -1644,10 +1696,6 @@
 		 * setup the path here so we can release it under lock
 		 * contention with the cow code
 		 */
-		p->nodes[level] = b;
-		if (!p->skip_locking)
-			p->locks[level] = 1;
-
 		if (cow) {
 			/*
 			 * if we don't really need to cow this block
@@ -1659,6 +1707,16 @@
 
 			btrfs_set_path_blocking(p);
 
+			/*
+			 * must have write locks on this node and the
+			 * parent
+			 */
+			if (level + 1 > write_lock_level) {
+				write_lock_level = level + 1;
+				btrfs_release_path(p);
+				goto again;
+			}
+
 			err = btrfs_cow_block(trans, root, b,
 					      p->nodes[level + 1],
 					      p->slots[level + 1], &b);
@@ -1671,10 +1729,7 @@
 		BUG_ON(!cow && ins_len);
 
 		p->nodes[level] = b;
-		if (!p->skip_locking)
-			p->locks[level] = 1;
-
-		btrfs_clear_path_blocking(p, NULL);
+		btrfs_clear_path_blocking(p, NULL, 0);
 
 		/*
 		 * we have a lock on b and as long as we aren't changing
@@ -1700,7 +1755,7 @@
 			}
 			p->slots[level] = slot;
 			err = setup_nodes_for_search(trans, root, p, b, level,
-						     ins_len);
+					     ins_len, &write_lock_level);
 			if (err == -EAGAIN)
 				goto again;
 			if (err) {
@@ -1710,6 +1765,19 @@
 			b = p->nodes[level];
 			slot = p->slots[level];
 
+			/*
+			 * slot 0 is special, if we change the key
+			 * we have to update the parent pointer
+			 * which means we must have a write lock
+			 * on the parent
+			 */
+			if (slot == 0 && cow &&
+			    write_lock_level < level + 1) {
+				write_lock_level = level + 1;
+				btrfs_release_path(p);
+				goto again;
+			}
+
 			unlock_up(p, level, lowest_unlock);
 
 			if (level == lowest_level) {
@@ -1728,23 +1796,42 @@
 			}
 
 			if (!p->skip_locking) {
-				btrfs_clear_path_blocking(p, NULL);
-				err = btrfs_try_spin_lock(b);
-
-				if (!err) {
-					btrfs_set_path_blocking(p);
-					btrfs_tree_lock(b);
-					btrfs_clear_path_blocking(p, b);
+				level = btrfs_header_level(b);
+				if (level <= write_lock_level) {
+					err = btrfs_try_tree_write_lock(b);
+					if (!err) {
+						btrfs_set_path_blocking(p);
+						btrfs_tree_lock(b);
+						btrfs_clear_path_blocking(p, b,
+								  BTRFS_WRITE_LOCK);
+					}
+					p->locks[level] = BTRFS_WRITE_LOCK;
+				} else {
+					err = btrfs_try_tree_read_lock(b);
+					if (!err) {
+						btrfs_set_path_blocking(p);
+						btrfs_tree_read_lock(b);
+						btrfs_clear_path_blocking(p, b,
+								  BTRFS_READ_LOCK);
+					}
+					p->locks[level] = BTRFS_READ_LOCK;
 				}
+				p->nodes[level] = b;
 			}
 		} else {
 			p->slots[level] = slot;
 			if (ins_len > 0 &&
 			    btrfs_leaf_free_space(root, b) < ins_len) {
+				if (write_lock_level < 1) {
+					write_lock_level = 1;
+					btrfs_release_path(p);
+					goto again;
+				}
+
 				btrfs_set_path_blocking(p);
 				err = split_leaf(trans, root, key,
 						 p, ins_len, ret == 0);
-				btrfs_clear_path_blocking(p, NULL);
+				btrfs_clear_path_blocking(p, NULL, 0);
 
 				BUG_ON(err > 0);
 				if (err) {
@@ -2025,7 +2112,7 @@
 	add_root_to_dirty_list(root);
 	extent_buffer_get(c);
 	path->nodes[level] = c;
-	path->locks[level] = 1;
+	path->locks[level] = BTRFS_WRITE_LOCK;
 	path->slots[level] = 0;
 	return 0;
 }
@@ -2253,14 +2340,6 @@
 		if (path->slots[0] == i)
 			push_space += data_size;
 
-		if (!left->map_token) {
-			map_extent_buffer(left, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&left->map_token, &left->kaddr,
-					&left->map_start, &left->map_len,
-					KM_USER1);
-		}
-
 		this_item_size = btrfs_item_size(left, item);
 		if (this_item_size + sizeof(*item) + push_space > free_space)
 			break;
@@ -2271,10 +2350,6 @@
 			break;
 		i--;
 	}
-	if (left->map_token) {
-		unmap_extent_buffer(left, left->map_token, KM_USER1);
-		left->map_token = NULL;
-	}
 
 	if (push_items == 0)
 		goto out_unlock;
@@ -2316,21 +2391,10 @@
 	push_space = BTRFS_LEAF_DATA_SIZE(root);
 	for (i = 0; i < right_nritems; i++) {
 		item = btrfs_item_nr(right, i);
-		if (!right->map_token) {
-			map_extent_buffer(right, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&right->map_token, &right->kaddr,
-					&right->map_start, &right->map_len,
-					KM_USER1);
-		}
 		push_space -= btrfs_item_size(right, item);
 		btrfs_set_item_offset(right, item, push_space);
 	}
 
-	if (right->map_token) {
-		unmap_extent_buffer(right, right->map_token, KM_USER1);
-		right->map_token = NULL;
-	}
 	left_nritems -= push_items;
 	btrfs_set_header_nritems(left, left_nritems);
 
@@ -2467,13 +2531,6 @@
 
 	for (i = 0; i < nr; i++) {
 		item = btrfs_item_nr(right, i);
-		if (!right->map_token) {
-			map_extent_buffer(right, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&right->map_token, &right->kaddr,
-					&right->map_start, &right->map_len,
-					KM_USER1);
-		}
 
 		if (!empty && push_items > 0) {
 			if (path->slots[0] < i)
@@ -2496,11 +2553,6 @@
 		push_space += this_item_size + sizeof(*item);
 	}
 
-	if (right->map_token) {
-		unmap_extent_buffer(right, right->map_token, KM_USER1);
-		right->map_token = NULL;
-	}
-
 	if (push_items == 0) {
 		ret = 1;
 		goto out;
@@ -2530,23 +2582,12 @@
 		u32 ioff;
 
 		item = btrfs_item_nr(left, i);
-		if (!left->map_token) {
-			map_extent_buffer(left, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&left->map_token, &left->kaddr,
-					&left->map_start, &left->map_len,
-					KM_USER1);
-		}
 
 		ioff = btrfs_item_offset(left, item);
 		btrfs_set_item_offset(left, item,
 		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
 	}
 	btrfs_set_header_nritems(left, old_left_nritems + push_items);
-	if (left->map_token) {
-		unmap_extent_buffer(left, left->map_token, KM_USER1);
-		left->map_token = NULL;
-	}
 
 	/* fixup right node */
 	if (push_items > right_nritems) {
@@ -2574,21 +2615,9 @@
 	for (i = 0; i < right_nritems; i++) {
 		item = btrfs_item_nr(right, i);
 
-		if (!right->map_token) {
-			map_extent_buffer(right, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&right->map_token, &right->kaddr,
-					&right->map_start, &right->map_len,
-					KM_USER1);
-		}
-
 		push_space = push_space - btrfs_item_size(right, item);
 		btrfs_set_item_offset(right, item, push_space);
 	}
-	if (right->map_token) {
-		unmap_extent_buffer(right, right->map_token, KM_USER1);
-		right->map_token = NULL;
-	}
 
 	btrfs_mark_buffer_dirty(left);
 	if (right_nritems)
@@ -2729,23 +2758,10 @@
 		struct btrfs_item *item = btrfs_item_nr(right, i);
 		u32 ioff;
 
-		if (!right->map_token) {
-			map_extent_buffer(right, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&right->map_token, &right->kaddr,
-					&right->map_start, &right->map_len,
-					KM_USER1);
-		}
-
 		ioff = btrfs_item_offset(right, item);
 		btrfs_set_item_offset(right, item, ioff + rt_data_off);
 	}
 
-	if (right->map_token) {
-		unmap_extent_buffer(right, right->map_token, KM_USER1);
-		right->map_token = NULL;
-	}
-
 	btrfs_set_header_nritems(l, mid);
 	ret = 0;
 	btrfs_item_key(right, &disk_key, 0);
@@ -3264,23 +3280,10 @@
 		u32 ioff;
 		item = btrfs_item_nr(leaf, i);
 
-		if (!leaf->map_token) {
-			map_extent_buffer(leaf, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&leaf->map_token, &leaf->kaddr,
-					&leaf->map_start, &leaf->map_len,
-					KM_USER1);
-		}
-
 		ioff = btrfs_item_offset(leaf, item);
 		btrfs_set_item_offset(leaf, item, ioff + size_diff);
 	}
 
-	if (leaf->map_token) {
-		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-		leaf->map_token = NULL;
-	}
-
 	/* shift the data */
 	if (from_end) {
 		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
@@ -3377,22 +3380,10 @@
 		u32 ioff;
 		item = btrfs_item_nr(leaf, i);
 
-		if (!leaf->map_token) {
-			map_extent_buffer(leaf, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&leaf->map_token, &leaf->kaddr,
-					&leaf->map_start, &leaf->map_len,
-					KM_USER1);
-		}
 		ioff = btrfs_item_offset(leaf, item);
 		btrfs_set_item_offset(leaf, item, ioff - data_size);
 	}
 
-	if (leaf->map_token) {
-		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-		leaf->map_token = NULL;
-	}
-
 	/* shift the data */
 	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
 		      data_end - data_size, btrfs_leaf_data(leaf) +
@@ -3494,27 +3485,13 @@
 		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
 		 */
 		/* first correct the data pointers */
-		WARN_ON(leaf->map_token);
 		for (i = slot; i < nritems; i++) {
 			u32 ioff;
 
 			item = btrfs_item_nr(leaf, i);
-			if (!leaf->map_token) {
-				map_extent_buffer(leaf, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&leaf->map_token, &leaf->kaddr,
-					&leaf->map_start, &leaf->map_len,
-					KM_USER1);
-			}
-
 			ioff = btrfs_item_offset(leaf, item);
 			btrfs_set_item_offset(leaf, item, ioff - total_data);
 		}
-		if (leaf->map_token) {
-			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-			leaf->map_token = NULL;
-		}
-
 		/* shift the items */
 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
 			      btrfs_item_nr_offset(slot),
@@ -3608,27 +3585,13 @@
 		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
 		 */
 		/* first correct the data pointers */
-		WARN_ON(leaf->map_token);
 		for (i = slot; i < nritems; i++) {
 			u32 ioff;
 
 			item = btrfs_item_nr(leaf, i);
-			if (!leaf->map_token) {
-				map_extent_buffer(leaf, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&leaf->map_token, &leaf->kaddr,
-					&leaf->map_start, &leaf->map_len,
-					KM_USER1);
-			}
-
 			ioff = btrfs_item_offset(leaf, item);
 			btrfs_set_item_offset(leaf, item, ioff - total_data);
 		}
-		if (leaf->map_token) {
-			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-			leaf->map_token = NULL;
-		}
-
 		/* shift the items */
 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
 			      btrfs_item_nr_offset(slot),
@@ -3840,22 +3803,10 @@
 			u32 ioff;
 
 			item = btrfs_item_nr(leaf, i);
-			if (!leaf->map_token) {
-				map_extent_buffer(leaf, (unsigned long)item,
-					sizeof(struct btrfs_item),
-					&leaf->map_token, &leaf->kaddr,
-					&leaf->map_start, &leaf->map_len,
-					KM_USER1);
-			}
 			ioff = btrfs_item_offset(leaf, item);
 			btrfs_set_item_offset(leaf, item, ioff + dsize);
 		}
 
-		if (leaf->map_token) {
-			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-			leaf->map_token = NULL;
-		}
-
 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
 			      btrfs_item_nr_offset(slot + nr),
 			      sizeof(struct btrfs_item) *
@@ -4004,11 +3955,11 @@
 
 	WARN_ON(!path->keep_locks);
 again:
-	cur = btrfs_lock_root_node(root);
+	cur = btrfs_read_lock_root_node(root);
 	level = btrfs_header_level(cur);
 	WARN_ON(path->nodes[level]);
 	path->nodes[level] = cur;
-	path->locks[level] = 1;
+	path->locks[level] = BTRFS_READ_LOCK;
 
 	if (btrfs_header_generation(cur) < min_trans) {
 		ret = 1;
@@ -4098,12 +4049,12 @@
 		cur = read_node_slot(root, cur, slot);
 		BUG_ON(!cur);
 
-		btrfs_tree_lock(cur);
+		btrfs_tree_read_lock(cur);
 
-		path->locks[level - 1] = 1;
+		path->locks[level - 1] = BTRFS_READ_LOCK;
 		path->nodes[level - 1] = cur;
 		unlock_up(path, level, 1);
-		btrfs_clear_path_blocking(path, NULL);
+		btrfs_clear_path_blocking(path, NULL, 0);
 	}
 out:
 	if (ret == 0)
@@ -4218,30 +4169,21 @@
 	u32 nritems;
 	int ret;
 	int old_spinning = path->leave_spinning;
-	int force_blocking = 0;
+	int next_rw_lock = 0;
 
 	nritems = btrfs_header_nritems(path->nodes[0]);
 	if (nritems == 0)
 		return 1;
 
-	/*
-	 * we take the blocks in an order that upsets lockdep.  Using
-	 * blocking mode is the only way around it.
-	 */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	force_blocking = 1;
-#endif
-
 	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
 again:
 	level = 1;
 	next = NULL;
+	next_rw_lock = 0;
 	btrfs_release_path(path);
 
 	path->keep_locks = 1;
-
-	if (!force_blocking)
-		path->leave_spinning = 1;
+	path->leave_spinning = 1;
 
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	path->keep_locks = 0;
@@ -4281,11 +4223,12 @@
 		}
 
 		if (next) {
-			btrfs_tree_unlock(next);
+			btrfs_tree_unlock_rw(next, next_rw_lock);
 			free_extent_buffer(next);
 		}
 
 		next = c;
+		next_rw_lock = path->locks[level];
 		ret = read_block_for_search(NULL, root, path, &next, level,
 					    slot, &key);
 		if (ret == -EAGAIN)
@@ -4297,15 +4240,14 @@
 		}
 
 		if (!path->skip_locking) {
-			ret = btrfs_try_spin_lock(next);
+			ret = btrfs_try_tree_read_lock(next);
 			if (!ret) {
 				btrfs_set_path_blocking(path);
-				btrfs_tree_lock(next);
-				if (!force_blocking)
-					btrfs_clear_path_blocking(path, next);
+				btrfs_tree_read_lock(next);
+				btrfs_clear_path_blocking(path, next,
+							  BTRFS_READ_LOCK);
 			}
-			if (force_blocking)
-				btrfs_set_lock_blocking(next);
+			next_rw_lock = BTRFS_READ_LOCK;
 		}
 		break;
 	}
@@ -4314,14 +4256,13 @@
 		level--;
 		c = path->nodes[level];
 		if (path->locks[level])
-			btrfs_tree_unlock(c);
+			btrfs_tree_unlock_rw(c, path->locks[level]);
 
 		free_extent_buffer(c);
 		path->nodes[level] = next;
 		path->slots[level] = 0;
 		if (!path->skip_locking)
-			path->locks[level] = 1;
-
+			path->locks[level] = next_rw_lock;
 		if (!level)
 			break;
 
@@ -4336,16 +4277,14 @@
 		}
 
 		if (!path->skip_locking) {
-			btrfs_assert_tree_locked(path->nodes[level]);
-			ret = btrfs_try_spin_lock(next);
+			ret = btrfs_try_tree_read_lock(next);
 			if (!ret) {
 				btrfs_set_path_blocking(path);
-				btrfs_tree_lock(next);
-				if (!force_blocking)
-					btrfs_clear_path_blocking(path, next);
+				btrfs_tree_read_lock(next);
+				btrfs_clear_path_blocking(path, next,
+							  BTRFS_READ_LOCK);
 			}
-			if (force_blocking)
-				btrfs_set_lock_blocking(next);
+			next_rw_lock = BTRFS_READ_LOCK;
 		}
 	}
 	ret = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fe9287b..365c4e1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -755,6 +755,8 @@
 				   chunks for this space */
 	unsigned int chunk_alloc:1;	/* set if we are allocating a chunk */
 
+	unsigned int flush:1;		/* set if we are trying to make space */
+
 	unsigned int force_alloc;	/* set if we need to force a chunk
 					   alloc for this space */
 
@@ -764,7 +766,7 @@
 	struct list_head block_groups[BTRFS_NR_RAID_TYPES];
 	spinlock_t lock;
 	struct rw_semaphore groups_sem;
-	atomic_t caching_threads;
+	wait_queue_head_t wait;
 };
 
 struct btrfs_block_rsv {
@@ -824,6 +826,7 @@
 	struct list_head list;
 	struct mutex mutex;
 	wait_queue_head_t wait;
+	struct btrfs_work work;
 	struct btrfs_block_group_cache *block_group;
 	u64 progress;
 	atomic_t count;
@@ -1032,6 +1035,8 @@
 	struct btrfs_workers endio_write_workers;
 	struct btrfs_workers endio_freespace_worker;
 	struct btrfs_workers submit_workers;
+	struct btrfs_workers caching_workers;
+
 	/*
 	 * fixup workers take dirty pages that didn't properly go through
 	 * the cow mechanism and make them safe to write.  It happens
@@ -2128,7 +2133,7 @@
 
 /* extent-tree.c */
 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
-						 int num_items)
+						 unsigned num_items)
 {
 	return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
 		3 * num_items;
@@ -2222,9 +2227,6 @@
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root,
-				int num_items);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2330,7 +2332,7 @@
 void btrfs_free_path(struct btrfs_path *p);
 void btrfs_set_path_blocking(struct btrfs_path *p);
 void btrfs_clear_path_blocking(struct btrfs_path *p,
-			       struct extent_buffer *held);
+			       struct extent_buffer *held, int held_rw);
 void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
 
 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 98c68e6..b52c672 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -735,7 +735,7 @@
 	}
 
 	/* reset all the locked nodes in the patch to spinning locks. */
-	btrfs_clear_path_blocking(path, NULL);
+	btrfs_clear_path_blocking(path, NULL, 0);
 
 	/* insert the keys of the items */
 	ret = setup_items_for_insert(trans, root, path, keys, data_size,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 685f259..c360a84 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -89,13 +89,8 @@
 	data_size = sizeof(*dir_item) + name_len + data_len;
 	dir_item = insert_with_overflow(trans, root, path, &key, data_size,
 					name, name_len);
-	/*
-	 * FIXME: at some point we should handle xattr's that are larger than
-	 * what we can fit in our leaf.  We set location to NULL b/c we arent
-	 * pointing at anything else, that will change if we store the xattr
-	 * data in a separate inode.
-	 */
-	BUG_ON(IS_ERR(dir_item));
+	if (IS_ERR(dir_item))
+		return PTR_ERR(dir_item);
 	memset(&location, 0, sizeof(location));
 
 	leaf = path->nodes[0];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b231ae1..07b3ac6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -100,38 +100,83 @@
 	struct btrfs_work work;
 };
 
-/* These are used to set the lockdep class on the extent buffer locks.
- * The class is set by the readpage_end_io_hook after the buffer has
- * passed csum validation but before the pages are unlocked.
+/*
+ * Lockdep class keys for extent_buffer->lock's in this root.  For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
  *
- * The lockdep class is also set by btrfs_init_new_buffer on freshly
- * allocated blocks.
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets.  As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->objectid.  This ensures that all special purpose roots
+ * have separate keysets.
  *
- * The class is based on the level in the tree block, which allows lockdep
- * to know that lower nodes nest inside the locks of higher nodes.
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock.  As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
  *
- * We also add a check to make sure the highest level of the tree is
- * the same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this
- * code needs update as well.
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked.  It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
+ *
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # if BTRFS_MAX_LEVEL != 8
 #  error
 # endif
-static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
-static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
-	/* leaf */
-	"btrfs-extent-00",
-	"btrfs-extent-01",
-	"btrfs-extent-02",
-	"btrfs-extent-03",
-	"btrfs-extent-04",
-	"btrfs-extent-05",
-	"btrfs-extent-06",
-	"btrfs-extent-07",
-	/* highest possible level */
-	"btrfs-extent-08",
+
+static struct btrfs_lockdep_keyset {
+	u64			id;		/* root objectid */
+	const char		*name_stem;	/* lock name stem */
+	char			names[BTRFS_MAX_LEVEL + 1][20];
+	struct lock_class_key	keys[BTRFS_MAX_LEVEL + 1];
+} btrfs_lockdep_keysets[] = {
+	{ .id = BTRFS_ROOT_TREE_OBJECTID,	.name_stem = "root"	},
+	{ .id = BTRFS_EXTENT_TREE_OBJECTID,	.name_stem = "extent"	},
+	{ .id = BTRFS_CHUNK_TREE_OBJECTID,	.name_stem = "chunk"	},
+	{ .id = BTRFS_DEV_TREE_OBJECTID,	.name_stem = "dev"	},
+	{ .id = BTRFS_FS_TREE_OBJECTID,		.name_stem = "fs"	},
+	{ .id = BTRFS_CSUM_TREE_OBJECTID,	.name_stem = "csum"	},
+	{ .id = BTRFS_ORPHAN_OBJECTID,		.name_stem = "orphan"	},
+	{ .id = BTRFS_TREE_LOG_OBJECTID,	.name_stem = "log"	},
+	{ .id = BTRFS_TREE_RELOC_OBJECTID,	.name_stem = "treloc"	},
+	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID,	.name_stem = "dreloc"	},
+	{ .id = 0,				.name_stem = "tree"	},
 };
+
+void __init btrfs_init_lockdep(void)
+{
+	int i, j;
+
+	/* initialize lockdep class names */
+	for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+		struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+
+		for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+			snprintf(ks->names[j], sizeof(ks->names[j]),
+				 "btrfs-%s-%02d", ks->name_stem, j);
+	}
+}
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+				    int level)
+{
+	struct btrfs_lockdep_keyset *ks;
+
+	BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+	/* find the matching keyset, id 0 is the default entry */
+	for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+		if (ks->id == objectid)
+			break;
+
+	lockdep_set_class_and_name(&eb->lock,
+				   &ks->keys[level], ks->names[level]);
+}
+
 #endif
 
 /*
@@ -217,7 +262,6 @@
 	unsigned long len;
 	unsigned long cur_len;
 	unsigned long offset = BTRFS_CSUM_SIZE;
-	char *map_token = NULL;
 	char *kaddr;
 	unsigned long map_start;
 	unsigned long map_len;
@@ -228,8 +272,7 @@
 	len = buf->len - offset;
 	while (len > 0) {
 		err = map_private_extent_buffer(buf, offset, 32,
-					&map_token, &kaddr,
-					&map_start, &map_len, KM_USER0);
+					&kaddr, &map_start, &map_len);
 		if (err)
 			return 1;
 		cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@
 				      crc, cur_len);
 		len -= cur_len;
 		offset += cur_len;
-		unmap_extent_buffer(buf, map_token, KM_USER0);
 	}
 	if (csum_size > sizeof(inline_result)) {
 		result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
-{
-	lockdep_set_class_and_name(&eb->lock,
-			   &btrfs_eb_class[level],
-			   btrfs_eb_name[level]);
-}
-#endif
-
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 			       struct extent_state *state)
 {
@@ -553,7 +586,8 @@
 	}
 	found_level = btrfs_header_level(eb);
 
-	btrfs_set_buffer_lockdep_class(eb, found_level);
+	btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+				       eb, found_level);
 
 	ret = csum_tree_block(root, eb, 1);
 	if (ret) {
@@ -1598,7 +1632,7 @@
 		goto fail_bdi;
 	}
 
-	fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
 	INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1802,6 +1836,9 @@
 			   fs_info->thread_pool_size),
 			   &fs_info->generic_worker);
 
+	btrfs_init_workers(&fs_info->caching_workers, "cache",
+			   2, &fs_info->generic_worker);
+
 	/* a higher idle thresh on the submit workers makes it much more
 	 * likely that bios will be send down in a sane order to the
 	 * devices
@@ -1855,6 +1892,7 @@
 	btrfs_start_workers(&fs_info->endio_write_workers, 1);
 	btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
 	btrfs_start_workers(&fs_info->delayed_workers, 1);
+	btrfs_start_workers(&fs_info->caching_workers, 1);
 
 	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
 	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2112,6 +2150,7 @@
 	btrfs_stop_workers(&fs_info->endio_freespace_worker);
 	btrfs_stop_workers(&fs_info->submit_workers);
 	btrfs_stop_workers(&fs_info->delayed_workers);
+	btrfs_stop_workers(&fs_info->caching_workers);
 fail_alloc:
 	kfree(fs_info->delayed_root);
 fail_iput:
@@ -2577,6 +2616,7 @@
 	btrfs_stop_workers(&fs_info->endio_freespace_worker);
 	btrfs_stop_workers(&fs_info->submit_workers);
 	btrfs_stop_workers(&fs_info->delayed_workers);
+	btrfs_stop_workers(&fs_info->caching_workers);
 
 	btrfs_close_devices(fs_info->fs_devices);
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a0b610a..bec3ea4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -87,10 +87,14 @@
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+void btrfs_init_lockdep(void);
+void btrfs_set_buffer_lockdep_class(u64 objectid,
+			            struct extent_buffer *eb, int level);
 #else
-static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
-						 int level)
+static inline void btrfs_init_lockdep(void)
+{ }
+static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+					struct extent_buffer *eb, int level)
 {
 }
 #endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71cd456..4d08ed7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,12 +320,12 @@
 	return total_added;
 }
 
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
 {
-	struct btrfs_block_group_cache *block_group = data;
-	struct btrfs_fs_info *fs_info = block_group->fs_info;
-	struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
-	struct btrfs_root *extent_root = fs_info->extent_root;
+	struct btrfs_block_group_cache *block_group;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_caching_control *caching_ctl;
+	struct btrfs_root *extent_root;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
@@ -334,9 +334,14 @@
 	u32 nritems;
 	int ret = 0;
 
+	caching_ctl = container_of(work, struct btrfs_caching_control, work);
+	block_group = caching_ctl->block_group;
+	fs_info = block_group->fs_info;
+	extent_root = fs_info->extent_root;
+
 	path = btrfs_alloc_path();
 	if (!path)
-		return -ENOMEM;
+		goto out;
 
 	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
@@ -433,13 +438,11 @@
 	free_excluded_extents(extent_root, block_group);
 
 	mutex_unlock(&caching_ctl->mutex);
+out:
 	wake_up(&caching_ctl->wait);
 
 	put_caching_control(caching_ctl);
-	atomic_dec(&block_group->space_info->caching_threads);
 	btrfs_put_block_group(block_group);
-
-	return 0;
 }
 
 static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@
 {
 	struct btrfs_fs_info *fs_info = cache->fs_info;
 	struct btrfs_caching_control *caching_ctl;
-	struct task_struct *tsk;
 	int ret = 0;
 
 	smp_mb();
@@ -501,6 +503,7 @@
 	caching_ctl->progress = cache->key.objectid;
 	/* one for caching kthread, one for caching block group list */
 	atomic_set(&caching_ctl->count, 2);
+	caching_ctl->work.func = caching_thread;
 
 	spin_lock(&cache->lock);
 	if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@
 	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
 	up_write(&fs_info->extent_commit_sem);
 
-	atomic_inc(&cache->space_info->caching_threads);
 	btrfs_get_block_group(cache);
 
-	tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
-			  cache->key.objectid);
-	if (IS_ERR(tsk)) {
-		ret = PTR_ERR(tsk);
-		printk(KERN_ERR "error running thread %d\n", ret);
-		BUG();
-	}
+	btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
 
 	return ret;
 }
@@ -2932,9 +2928,10 @@
 	found->full = 0;
 	found->force_alloc = CHUNK_ALLOC_NO_FORCE;
 	found->chunk_alloc = 0;
+	found->flush = 0;
+	init_waitqueue_head(&found->wait);
 	*space_info = found;
 	list_add_rcu(&found->list, &info->space_info);
-	atomic_set(&found->caching_threads, 0);
 	return 0;
 }
 
@@ -3314,6 +3311,14 @@
 	if (reserved == 0)
 		return 0;
 
+	smp_mb();
+	if (root->fs_info->delalloc_bytes == 0) {
+		if (trans)
+			return 0;
+		btrfs_wait_ordered_extents(root, 0, 0);
+		return 0;
+	}
+
 	max_reclaim = min(reserved, to_reclaim);
 
 	while (loops < 1024) {
@@ -3356,6 +3361,8 @@
 		}
 
 	}
+	if (reclaimed >= to_reclaim && !trans)
+		btrfs_wait_ordered_extents(root, 0, 0);
 	return reclaimed >= to_reclaim;
 }
 
@@ -3380,15 +3387,36 @@
 	u64 num_bytes = orig_bytes;
 	int retries = 0;
 	int ret = 0;
-	bool reserved = false;
 	bool committed = false;
+	bool flushing = false;
 
 again:
-	ret = -ENOSPC;
-	if (reserved)
-		num_bytes = 0;
-
+	ret = 0;
 	spin_lock(&space_info->lock);
+	/*
+	 * We only want to wait if somebody other than us is flushing and we are
+	 * actually alloed to flush.
+	 */
+	while (flush && !flushing && space_info->flush) {
+		spin_unlock(&space_info->lock);
+		/*
+		 * If we have a trans handle we can't wait because the flusher
+		 * may have to commit the transaction, which would mean we would
+		 * deadlock since we are waiting for the flusher to finish, but
+		 * hold the current transaction open.
+		 */
+		if (trans)
+			return -EAGAIN;
+		ret = wait_event_interruptible(space_info->wait,
+					       !space_info->flush);
+		/* Must have been interrupted, return */
+		if (ret)
+			return -EINTR;
+
+		spin_lock(&space_info->lock);
+	}
+
+	ret = -ENOSPC;
 	unused = space_info->bytes_used + space_info->bytes_reserved +
 		 space_info->bytes_pinned + space_info->bytes_readonly +
 		 space_info->bytes_may_use;
@@ -3403,8 +3431,7 @@
 	if (unused <= space_info->total_bytes) {
 		unused = space_info->total_bytes - unused;
 		if (unused >= num_bytes) {
-			if (!reserved)
-				space_info->bytes_reserved += orig_bytes;
+			space_info->bytes_reserved += orig_bytes;
 			ret = 0;
 		} else {
 			/*
@@ -3429,17 +3456,14 @@
 	 * to reclaim space we can actually use it instead of somebody else
 	 * stealing it from us.
 	 */
-	if (ret && !reserved) {
-		space_info->bytes_reserved += orig_bytes;
-		reserved = true;
+	if (ret && flush) {
+		flushing = true;
+		space_info->flush = 1;
 	}
 
 	spin_unlock(&space_info->lock);
 
-	if (!ret)
-		return 0;
-
-	if (!flush)
+	if (!ret || !flush)
 		goto out;
 
 	/*
@@ -3447,11 +3471,11 @@
 	 * metadata until after the IO is completed.
 	 */
 	ret = shrink_delalloc(trans, root, num_bytes, 1);
-	if (ret > 0)
-		return 0;
-	else if (ret < 0)
+	if (ret < 0)
 		goto out;
 
+	ret = 0;
+
 	/*
 	 * So if we were overcommitted it's possible that somebody else flushed
 	 * out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3486,11 @@
 		goto again;
 	}
 
-	spin_lock(&space_info->lock);
 	/*
 	 * Not enough space to be reclaimed, don't bother committing the
 	 * transaction.
 	 */
+	spin_lock(&space_info->lock);
 	if (space_info->bytes_pinned < orig_bytes)
 		ret = -ENOSPC;
 	spin_unlock(&space_info->lock);
@@ -3474,10 +3498,13 @@
 		goto out;
 
 	ret = -EAGAIN;
-	if (trans || committed)
+	if (trans)
 		goto out;
 
 	ret = -ENOSPC;
+	if (committed)
+		goto out;
+
 	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans))
 		goto out;
@@ -3489,12 +3516,12 @@
 	}
 
 out:
-	if (reserved) {
+	if (flushing) {
 		spin_lock(&space_info->lock);
-		space_info->bytes_reserved -= orig_bytes;
+		space_info->flush = 0;
+		wake_up_all(&space_info->wait);
 		spin_unlock(&space_info->lock);
 	}
-
 	return ret;
 }
 
@@ -3704,7 +3731,6 @@
 	if (commit_trans) {
 		if (trans)
 			return -EAGAIN;
-
 		trans = btrfs_join_transaction(root);
 		BUG_ON(IS_ERR(trans));
 		ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3900,6 @@
 	return 0;
 }
 
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root,
-				 int num_items)
-{
-	u64 num_bytes;
-	int ret;
-
-	if (num_items == 0 || root->fs_info->chunk_root == root)
-		return 0;
-
-	num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
-	ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-				  num_bytes);
-	if (!ret) {
-		trans->bytes_reserved += num_bytes;
-		trans->block_rsv = &root->fs_info->trans_block_rsv;
-	}
-	return ret;
-}
-
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 				  struct btrfs_root *root)
 {
@@ -3944,6 +3950,30 @@
 	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+	unsigned dropped_extents = 0;
+
+	spin_lock(&BTRFS_I(inode)->lock);
+	BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+	BTRFS_I(inode)->outstanding_extents--;
+
+	/*
+	 * If we have more or the same amount of outsanding extents than we have
+	 * reserved then we need to leave the reserved extents count alone.
+	 */
+	if (BTRFS_I(inode)->outstanding_extents >=
+	    BTRFS_I(inode)->reserved_extents)
+		goto out;
+
+	dropped_extents = BTRFS_I(inode)->reserved_extents -
+		BTRFS_I(inode)->outstanding_extents;
+	BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+	spin_unlock(&BTRFS_I(inode)->lock);
+	return dropped_extents;
+}
+
 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
 {
 	return num_bytes >>= 3;
@@ -3953,9 +3983,8 @@
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
-	u64 to_reserve;
-	int nr_extents;
-	int reserved_extents;
+	u64 to_reserve = 0;
+	unsigned nr_extents = 0;
 	int ret;
 
 	if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +3992,49 @@
 
 	num_bytes = ALIGN(num_bytes, root->sectorsize);
 
-	nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
-	reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+	spin_lock(&BTRFS_I(inode)->lock);
+	BTRFS_I(inode)->outstanding_extents++;
 
-	if (nr_extents > reserved_extents) {
-		nr_extents -= reserved_extents;
+	if (BTRFS_I(inode)->outstanding_extents >
+	    BTRFS_I(inode)->reserved_extents) {
+		nr_extents = BTRFS_I(inode)->outstanding_extents -
+			BTRFS_I(inode)->reserved_extents;
+		BTRFS_I(inode)->reserved_extents += nr_extents;
+
 		to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
-	} else {
-		nr_extents = 0;
-		to_reserve = 0;
 	}
+	spin_unlock(&BTRFS_I(inode)->lock);
 
 	to_reserve += calc_csum_metadata_size(inode, num_bytes);
 	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
-	if (ret)
+	if (ret) {
+		unsigned dropped;
+		/*
+		 * We don't need the return value since our reservation failed,
+		 * we just need to clean up our counter.
+		 */
+		dropped = drop_outstanding_extent(inode);
+		WARN_ON(dropped > 1);
 		return ret;
-
-	atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
-	atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+	}
 
 	block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
-	if (block_rsv->size > 512 * 1024 * 1024)
-		shrink_delalloc(NULL, root, to_reserve, 0);
-
 	return 0;
 }
 
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	u64 to_free;
-	int nr_extents;
-	int reserved_extents;
+	u64 to_free = 0;
+	unsigned dropped;
 
 	num_bytes = ALIGN(num_bytes, root->sectorsize);
-	atomic_dec(&BTRFS_I(inode)->outstanding_extents);
-	WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
-	reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
-	do {
-		int old, new;
-
-		nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
-		if (nr_extents >= reserved_extents) {
-			nr_extents = 0;
-			break;
-		}
-		old = reserved_extents;
-		nr_extents = reserved_extents - nr_extents;
-		new = reserved_extents - nr_extents;
-		old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
-				     reserved_extents, new);
-		if (likely(old == reserved_extents))
-			break;
-		reserved_extents = old;
-	} while (1);
+	dropped = drop_outstanding_extent(inode);
 
 	to_free = calc_csum_metadata_size(inode, num_bytes);
-	if (nr_extents > 0)
-		to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+	if (dropped > 0)
+		to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
 	btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
 				to_free);
@@ -4990,14 +5002,10 @@
 			}
 
 			/*
-			 * We only want to start kthread caching if we are at
-			 * the point where we will wait for caching to make
-			 * progress, or if our ideal search is over and we've
-			 * found somebody to start caching.
+			 * The caching workers are limited to 2 threads, so we
+			 * can queue as much work as we care to.
 			 */
-			if (loop > LOOP_CACHING_NOWAIT ||
-			    (loop > LOOP_FIND_IDEAL &&
-			     atomic_read(&space_info->caching_threads) < 2)) {
+			if (loop > LOOP_FIND_IDEAL) {
 				ret = cache_block_group(block_group, trans,
 							orig_root, 0);
 				BUG_ON(ret);
@@ -5219,8 +5227,7 @@
 		if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
 			found_uncached_bg = false;
 			loop++;
-			if (!ideal_cache_percent &&
-			    atomic_read(&space_info->caching_threads))
+			if (!ideal_cache_percent)
 				goto search;
 
 			/*
@@ -5623,7 +5630,7 @@
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 	btrfs_set_header_generation(buf, trans->transid);
-	btrfs_set_buffer_lockdep_class(buf, level);
+	btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
 	btrfs_tree_lock(buf);
 	clean_tree_block(trans, root, buf);
 
@@ -5910,7 +5917,7 @@
 			return 1;
 
 		if (path->locks[level] && !wc->keep_locks) {
-			btrfs_tree_unlock(eb);
+			btrfs_tree_unlock_rw(eb, path->locks[level]);
 			path->locks[level] = 0;
 		}
 		return 0;
@@ -5934,7 +5941,7 @@
 	 * keep the tree lock
 	 */
 	if (path->locks[level] && level > 0) {
-		btrfs_tree_unlock(eb);
+		btrfs_tree_unlock_rw(eb, path->locks[level]);
 		path->locks[level] = 0;
 	}
 	return 0;
@@ -6047,7 +6054,7 @@
 	BUG_ON(level != btrfs_header_level(next));
 	path->nodes[level] = next;
 	path->slots[level] = 0;
-	path->locks[level] = 1;
+	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 	wc->level = level;
 	if (wc->level == 1)
 		wc->reada_slot = 0;
@@ -6118,7 +6125,7 @@
 			BUG_ON(level == 0);
 			btrfs_tree_lock(eb);
 			btrfs_set_lock_blocking(eb);
-			path->locks[level] = 1;
+			path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
 			ret = btrfs_lookup_extent_info(trans, root,
 						       eb->start, eb->len,
@@ -6127,8 +6134,7 @@
 			BUG_ON(ret);
 			BUG_ON(wc->refs[level] == 0);
 			if (wc->refs[level] == 1) {
-				btrfs_tree_unlock(eb);
-				path->locks[level] = 0;
+				btrfs_tree_unlock_rw(eb, path->locks[level]);
 				return 1;
 			}
 		}
@@ -6150,7 +6156,7 @@
 		    btrfs_header_generation(eb) == trans->transid) {
 			btrfs_tree_lock(eb);
 			btrfs_set_lock_blocking(eb);
-			path->locks[level] = 1;
+			path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 		}
 		clean_tree_block(trans, root, eb);
 	}
@@ -6229,7 +6235,8 @@
 				return 0;
 
 			if (path->locks[level]) {
-				btrfs_tree_unlock(path->nodes[level]);
+				btrfs_tree_unlock_rw(path->nodes[level],
+						     path->locks[level]);
 				path->locks[level] = 0;
 			}
 			free_extent_buffer(path->nodes[level]);
@@ -6281,7 +6288,7 @@
 		path->nodes[level] = btrfs_lock_root_node(root);
 		btrfs_set_lock_blocking(path->nodes[level]);
 		path->slots[level] = 0;
-		path->locks[level] = 1;
+		path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 		memset(&wc->update_progress, 0,
 		       sizeof(wc->update_progress));
 	} else {
@@ -6449,7 +6456,7 @@
 	level = btrfs_header_level(node);
 	path->nodes[level] = node;
 	path->slots[level] = 0;
-	path->locks[level] = 1;
+	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
 	wc->refs[parent_level] = 1;
 	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,15 +6531,28 @@
 	return flags;
 }
 
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 {
 	struct btrfs_space_info *sinfo = cache->space_info;
 	u64 num_bytes;
+	u64 min_allocable_bytes;
 	int ret = -ENOSPC;
 
 	if (cache->ro)
 		return 0;
 
+	/*
+	 * We need some metadata space and system metadata space for
+	 * allocating chunks in some corner cases until we force to set
+	 * it to be readonly.
+	 */
+	if ((sinfo->flags &
+	     (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+	    !force)
+		min_allocable_bytes = 1 * 1024 * 1024;
+	else
+		min_allocable_bytes = 0;
+
 	spin_lock(&sinfo->lock);
 	spin_lock(&cache->lock);
 	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
@@ -6540,7 +6560,8 @@
 
 	if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
 	    sinfo->bytes_may_use + sinfo->bytes_readonly +
-	    cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+	    cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+	    sinfo->total_bytes) {
 		sinfo->bytes_readonly += num_bytes;
 		sinfo->bytes_reserved += cache->reserved_pinned;
 		cache->reserved_pinned = 0;
@@ -6571,7 +6592,7 @@
 		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 			       CHUNK_ALLOC_FORCE);
 
-	ret = set_block_group_ro(cache);
+	ret = set_block_group_ro(cache, 0);
 	if (!ret)
 		goto out;
 	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6600,7 @@
 			     CHUNK_ALLOC_FORCE);
 	if (ret < 0)
 		goto out;
-	ret = set_block_group_ro(cache);
+	ret = set_block_group_ro(cache, 0);
 out:
 	btrfs_end_transaction(trans, root);
 	return ret;
@@ -7016,7 +7037,7 @@
 
 		set_avail_alloc_bits(root->fs_info, cache->flags);
 		if (btrfs_chunk_readonly(root, cache->key.objectid))
-			set_block_group_ro(cache);
+			set_block_group_ro(cache, 1);
 	}
 
 	list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7051,9 @@
 		 * mirrored block groups.
 		 */
 		list_for_each_entry(cache, &space_info->block_groups[3], list)
-			set_block_group_ro(cache);
+			set_block_group_ro(cache, 1);
 		list_for_each_entry(cache, &space_info->block_groups[4], list)
-			set_block_group_ro(cache);
+			set_block_group_ro(cache, 1);
 	}
 
 	init_global_block_rsv(info);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 561262d..067b174 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -281,11 +281,10 @@
 		if (other->start == state->end + 1 &&
 		    other->state == state->state) {
 			merge_cb(tree, state, other);
-			other->start = state->start;
-			state->tree = NULL;
-			rb_erase(&state->rb_node, &tree->state);
-			free_extent_state(state);
-			state = NULL;
+			state->end = other->end;
+			other->tree = NULL;
+			rb_erase(&other->rb_node, &tree->state);
+			free_extent_state(other);
 		}
 	}
 
@@ -351,7 +350,6 @@
 		       "%llu %llu\n", (unsigned long long)found->start,
 		       (unsigned long long)found->end,
 		       (unsigned long long)start, (unsigned long long)end);
-		free_extent_state(state);
 		return -EEXIST;
 	}
 	state->tree = tree;
@@ -500,7 +498,8 @@
 			cached_state = NULL;
 		}
 
-		if (cached && cached->tree && cached->start == start) {
+		if (cached && cached->tree && cached->start <= start &&
+		    cached->end > start) {
 			if (clear)
 				atomic_dec(&cached->refs);
 			state = cached;
@@ -742,7 +741,8 @@
 	spin_lock(&tree->lock);
 	if (cached_state && *cached_state) {
 		state = *cached_state;
-		if (state->start == start && state->tree) {
+		if (state->start <= start && state->end > start &&
+		    state->tree) {
 			node = &state->rb_node;
 			goto hit_next;
 		}
@@ -783,13 +783,13 @@
 		if (err)
 			goto out;
 
-		next_node = rb_next(node);
 		cache_state(state, cached_state);
 		merge_state(tree, state);
 		if (last_end == (u64)-1)
 			goto out;
 
 		start = last_end + 1;
+		next_node = rb_next(&state->rb_node);
 		if (next_node && start < end && prealloc && !need_resched()) {
 			state = rb_entry(next_node, struct extent_state,
 					 rb_node);
@@ -862,7 +862,6 @@
 		 * Avoid to free 'prealloc' if it can be merged with
 		 * the later extent.
 		 */
-		atomic_inc(&prealloc->refs);
 		err = insert_state(tree, prealloc, start, this_end,
 				   &bits);
 		BUG_ON(err == -EEXIST);
@@ -872,7 +871,6 @@
 			goto out;
 		}
 		cache_state(prealloc, cached_state);
-		free_extent_state(prealloc);
 		prealloc = NULL;
 		start = this_end + 1;
 		goto search_again;
@@ -1564,7 +1562,8 @@
 	int bitset = 0;
 
 	spin_lock(&tree->lock);
-	if (cached && cached->tree && cached->start == start)
+	if (cached && cached->tree && cached->start <= start &&
+	    cached->end > start)
 		node = &cached->rb_node;
 	else
 		node = tree_search(tree, start);
@@ -2432,6 +2431,7 @@
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	int scanned = 0;
+	int tag;
 
 	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
@@ -2442,11 +2442,16 @@
 		end = wbc->range_end >> PAGE_CACHE_SHIFT;
 		scanned = 1;
 	}
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		tag = PAGECACHE_TAG_TOWRITE;
+	else
+		tag = PAGECACHE_TAG_DIRTY;
 retry:
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		tag_pages_for_writeback(mapping, index, end);
 	while (!done && !nr_to_write_done && (index <= end) &&
-	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			      PAGECACHE_TAG_DIRTY, min(end - index,
-				  (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
 		unsigned i;
 
 		scanned = 1;
@@ -3020,8 +3025,15 @@
 		return NULL;
 	eb->start = start;
 	eb->len = len;
-	spin_lock_init(&eb->lock);
-	init_waitqueue_head(&eb->lock_wq);
+	rwlock_init(&eb->lock);
+	atomic_set(&eb->write_locks, 0);
+	atomic_set(&eb->read_locks, 0);
+	atomic_set(&eb->blocking_readers, 0);
+	atomic_set(&eb->blocking_writers, 0);
+	atomic_set(&eb->spinning_readers, 0);
+	atomic_set(&eb->spinning_writers, 0);
+	init_waitqueue_head(&eb->write_lock_wq);
+	init_waitqueue_head(&eb->read_lock_wq);
 
 #if LEAK_DEBUG
 	spin_lock_irqsave(&leak_lock, flags);
@@ -3117,7 +3129,7 @@
 		i = 0;
 	}
 	for (; i < num_pages; i++, index++) {
-		p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
+		p = find_or_create_page(mapping, index, GFP_NOFS);
 		if (!p) {
 			WARN_ON(1);
 			goto free_eb;
@@ -3264,6 +3276,22 @@
 	return was_dirty;
 }
 
+static int __eb_straddles_pages(u64 start, u64 len)
+{
+	if (len < PAGE_CACHE_SIZE)
+		return 1;
+	if (start & (PAGE_CACHE_SIZE - 1))
+		return 1;
+	if ((start + len) & (PAGE_CACHE_SIZE - 1))
+		return 1;
+	return 0;
+}
+
+static int eb_straddles_pages(struct extent_buffer *eb)
+{
+	return __eb_straddles_pages(eb->start, eb->len);
+}
+
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
 				struct extent_buffer *eb,
 				struct extent_state **cached_state)
@@ -3275,8 +3303,10 @@
 	num_pages = num_extent_pages(eb->start, eb->len);
 	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
-	clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-			      cached_state, GFP_NOFS);
+	if (eb_straddles_pages(eb)) {
+		clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+				      cached_state, GFP_NOFS);
+	}
 	for (i = 0; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
 		if (page)
@@ -3294,8 +3324,10 @@
 
 	num_pages = num_extent_pages(eb->start, eb->len);
 
-	set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-			    NULL, GFP_NOFS);
+	if (eb_straddles_pages(eb)) {
+		set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+				    NULL, GFP_NOFS);
+	}
 	for (i = 0; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
 		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3318,9 +3350,12 @@
 	int uptodate;
 	unsigned long index;
 
-	ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
-	if (ret)
-		return 1;
+	if (__eb_straddles_pages(start, end - start + 1)) {
+		ret = test_range_bit(tree, start, end,
+				     EXTENT_UPTODATE, 1, NULL);
+		if (ret)
+			return 1;
+	}
 	while (start <= end) {
 		index = start >> PAGE_CACHE_SHIFT;
 		page = find_get_page(tree->mapping, index);
@@ -3348,10 +3383,12 @@
 	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
 		return 1;
 
-	ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-			   EXTENT_UPTODATE, 1, cached_state);
-	if (ret)
-		return ret;
+	if (eb_straddles_pages(eb)) {
+		ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+				   EXTENT_UPTODATE, 1, cached_state);
+		if (ret)
+			return ret;
+	}
 
 	num_pages = num_extent_pages(eb->start, eb->len);
 	for (i = 0; i < num_pages; i++) {
@@ -3384,9 +3421,11 @@
 	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
 		return 0;
 
-	if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-			   EXTENT_UPTODATE, 1, NULL)) {
-		return 0;
+	if (eb_straddles_pages(eb)) {
+		if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+				   EXTENT_UPTODATE, 1, NULL)) {
+			return 0;
+		}
 	}
 
 	if (start) {
@@ -3490,9 +3529,8 @@
 		page = extent_buffer_page(eb, i);
 
 		cur = min(len, (PAGE_CACHE_SIZE - offset));
-		kaddr = kmap_atomic(page, KM_USER1);
+		kaddr = page_address(page);
 		memcpy(dst, kaddr + offset, cur);
-		kunmap_atomic(kaddr, KM_USER1);
 
 		dst += cur;
 		len -= cur;
@@ -3502,9 +3540,9 @@
 }
 
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-			       unsigned long min_len, char **token, char **map,
+			       unsigned long min_len, char **map,
 			       unsigned long *map_start,
-			       unsigned long *map_len, int km)
+			       unsigned long *map_len)
 {
 	size_t offset = start & (PAGE_CACHE_SIZE - 1);
 	char *kaddr;
@@ -3534,42 +3572,12 @@
 	}
 
 	p = extent_buffer_page(eb, i);
-	kaddr = kmap_atomic(p, km);
-	*token = kaddr;
+	kaddr = page_address(p);
 	*map = kaddr + offset;
 	*map_len = PAGE_CACHE_SIZE - offset;
 	return 0;
 }
 
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-		      unsigned long min_len,
-		      char **token, char **map,
-		      unsigned long *map_start,
-		      unsigned long *map_len, int km)
-{
-	int err;
-	int save = 0;
-	if (eb->map_token) {
-		unmap_extent_buffer(eb, eb->map_token, km);
-		eb->map_token = NULL;
-		save = 1;
-	}
-	err = map_private_extent_buffer(eb, start, min_len, token, map,
-				       map_start, map_len, km);
-	if (!err && save) {
-		eb->map_token = *token;
-		eb->kaddr = *map;
-		eb->map_start = *map_start;
-		eb->map_len = *map_len;
-	}
-	return err;
-}
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
-	kunmap_atomic(token, km);
-}
-
 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 			  unsigned long start,
 			  unsigned long len)
@@ -3593,9 +3601,8 @@
 
 		cur = min(len, (PAGE_CACHE_SIZE - offset));
 
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = page_address(page);
 		ret = memcmp(ptr, kaddr + offset, cur);
-		kunmap_atomic(kaddr, KM_USER0);
 		if (ret)
 			break;
 
@@ -3628,9 +3635,8 @@
 		WARN_ON(!PageUptodate(page));
 
 		cur = min(len, PAGE_CACHE_SIZE - offset);
-		kaddr = kmap_atomic(page, KM_USER1);
+		kaddr = page_address(page);
 		memcpy(kaddr + offset, src, cur);
-		kunmap_atomic(kaddr, KM_USER1);
 
 		src += cur;
 		len -= cur;
@@ -3659,9 +3665,8 @@
 		WARN_ON(!PageUptodate(page));
 
 		cur = min(len, PAGE_CACHE_SIZE - offset);
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = page_address(page);
 		memset(kaddr + offset, c, cur);
-		kunmap_atomic(kaddr, KM_USER0);
 
 		len -= cur;
 		offset = 0;
@@ -3692,9 +3697,8 @@
 
 		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
 
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = page_address(page);
 		read_extent_buffer(src, kaddr + offset, src_offset, cur);
-		kunmap_atomic(kaddr, KM_USER0);
 
 		src_offset += cur;
 		len -= cur;
@@ -3707,20 +3711,17 @@
 		       unsigned long dst_off, unsigned long src_off,
 		       unsigned long len)
 {
-	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+	char *dst_kaddr = page_address(dst_page);
 	if (dst_page == src_page) {
 		memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
 	} else {
-		char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+		char *src_kaddr = page_address(src_page);
 		char *p = dst_kaddr + dst_off + len;
 		char *s = src_kaddr + src_off + len;
 
 		while (len--)
 			*--p = *--s;
-
-		kunmap_atomic(src_kaddr, KM_USER1);
 	}
-	kunmap_atomic(dst_kaddr, KM_USER0);
 }
 
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3733,20 +3734,17 @@
 		       unsigned long dst_off, unsigned long src_off,
 		       unsigned long len)
 {
-	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+	char *dst_kaddr = page_address(dst_page);
 	char *src_kaddr;
 
 	if (dst_page != src_page) {
-		src_kaddr = kmap_atomic(src_page, KM_USER1);
+		src_kaddr = page_address(src_page);
 	} else {
 		src_kaddr = dst_kaddr;
 		BUG_ON(areas_overlap(src_off, dst_off, len));
 	}
 
 	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-	kunmap_atomic(dst_kaddr, KM_USER0);
-	if (dst_page != src_page)
-		kunmap_atomic(src_kaddr, KM_USER1);
 }
 
 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a11a92e..21a7ca9 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -120,8 +120,6 @@
 struct extent_buffer {
 	u64 start;
 	unsigned long len;
-	char *map_token;
-	char *kaddr;
 	unsigned long map_start;
 	unsigned long map_len;
 	struct page *first_page;
@@ -130,14 +128,26 @@
 	struct rcu_head rcu_head;
 	atomic_t refs;
 
-	/* the spinlock is used to protect most operations */
-	spinlock_t lock;
+	/* count of read lock holders on the extent buffer */
+	atomic_t write_locks;
+	atomic_t read_locks;
+	atomic_t blocking_writers;
+	atomic_t blocking_readers;
+	atomic_t spinning_readers;
+	atomic_t spinning_writers;
 
-	/*
-	 * when we keep the lock held while blocking, waiters go onto
-	 * the wq
+	/* protects write locks */
+	rwlock_t lock;
+
+	/* readers use lock_wq while they wait for the write
+	 * lock holders to unlock
 	 */
-	wait_queue_head_t lock_wq;
+	wait_queue_head_t write_lock_wq;
+
+	/* writers use read_lock_wq while they wait for readers
+	 * to unlock
+	 */
+	wait_queue_head_t read_lock_wq;
 };
 
 static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -279,15 +289,10 @@
 int extent_buffer_uptodate(struct extent_io_tree *tree,
 			   struct extent_buffer *eb,
 			   struct extent_state *cached_state);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-		      unsigned long min_len, char **token, char **map,
-		      unsigned long *map_start,
-		      unsigned long *map_len, int km);
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-		      unsigned long min_len, char **token, char **map,
+		      unsigned long min_len, char **map,
 		      unsigned long *map_start,
-		      unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+		      unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
 			  u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 90d4ee5..08bcfa9 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -177,6 +177,15 @@
 
 	WARN_ON(bio->bi_vcnt <= 0);
 
+	/*
+	 * the free space stuff is only read when it hasn't been
+	 * updated in the current transaction.  So, we can safely
+	 * read from the commit root and sidestep a nasty deadlock
+	 * between reading the free space cache and updating the csum tree.
+	 */
+	if (btrfs_is_free_space_inode(root, inode))
+		path->search_commit_root = 1;
+
 	disk_bytenr = (u64)bio->bi_sector << 9;
 	if (dio)
 		offset = logical_offset;
@@ -664,10 +673,6 @@
 	struct btrfs_sector_sum *sector_sum;
 	u32 nritems;
 	u32 ins_size;
-	char *eb_map;
-	char *eb_token;
-	unsigned long map_len;
-	unsigned long map_start;
 	u16 csum_size =
 		btrfs_super_csum_size(&root->fs_info->super_copy);
 
@@ -814,30 +819,9 @@
 	item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
 	item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
 				      btrfs_item_size_nr(leaf, path->slots[0]));
-	eb_token = NULL;
 next_sector:
 
-	if (!eb_token ||
-	   (unsigned long)item + csum_size >= map_start + map_len) {
-		int err;
-
-		if (eb_token)
-			unmap_extent_buffer(leaf, eb_token, KM_USER1);
-		eb_token = NULL;
-		err = map_private_extent_buffer(leaf, (unsigned long)item,
-						csum_size,
-						&eb_token, &eb_map,
-						&map_start, &map_len, KM_USER1);
-		if (err)
-			eb_token = NULL;
-	}
-	if (eb_token) {
-		memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
-		       &sector_sum->sum, csum_size);
-	} else {
-		write_extent_buffer(leaf, &sector_sum->sum,
-				    (unsigned long)item, csum_size);
-	}
+	write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
 
 	total_bytes += root->sectorsize;
 	sector_sum++;
@@ -850,10 +834,7 @@
 			goto next_sector;
 		}
 	}
-	if (eb_token) {
-		unmap_extent_buffer(leaf, eb_token, KM_USER1);
-		eb_token = NULL;
-	}
+
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 	if (total_bytes < sums->len) {
 		btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 59cbdb1..a35e51c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1081,7 +1081,8 @@
 
 again:
 	for (i = 0; i < num_pages; i++) {
-		pages[i] = grab_cache_page(inode->i_mapping, index + i);
+		pages[i] = find_or_create_page(inode->i_mapping, index + i,
+					       GFP_NOFS);
 		if (!pages[i]) {
 			faili = i - 1;
 			err = -ENOMEM;
@@ -1238,9 +1239,11 @@
 		 * managed to copy.
 		 */
 		if (num_pages > dirty_pages) {
-			if (copied > 0)
-				atomic_inc(
-					&BTRFS_I(inode)->outstanding_extents);
+			if (copied > 0) {
+				spin_lock(&BTRFS_I(inode)->lock);
+				BTRFS_I(inode)->outstanding_extents++;
+				spin_unlock(&BTRFS_I(inode)->lock);
+			}
 			btrfs_delalloc_release_space(inode,
 					(num_pages - dirty_pages) <<
 					PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index bf0d615..6377713 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,6 +98,12 @@
 		return inode;
 
 	spin_lock(&block_group->lock);
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
+		printk(KERN_INFO "Old style space inode found, converting.\n");
+		BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
+		block_group->disk_cache_state = BTRFS_DC_CLEAR;
+	}
+
 	if (!btrfs_fs_closing(root->fs_info)) {
 		block_group->inode = igrab(inode);
 		block_group->iref = 1;
@@ -135,7 +141,7 @@
 	btrfs_set_inode_gid(leaf, inode_item, 0);
 	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
 	btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
-			      BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+			      BTRFS_INODE_PREALLOC);
 	btrfs_set_inode_nlink(leaf, inode_item, 1);
 	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
 	btrfs_set_inode_block_group(leaf, inode_item, offset);
@@ -239,17 +245,12 @@
 	struct btrfs_free_space_header *header;
 	struct extent_buffer *leaf;
 	struct page *page;
-	u32 *checksums = NULL, *crc;
-	char *disk_crcs = NULL;
 	struct btrfs_key key;
 	struct list_head bitmaps;
 	u64 num_entries;
 	u64 num_bitmaps;
 	u64 generation;
-	u32 cur_crc = ~(u32)0;
 	pgoff_t index = 0;
-	unsigned long first_page_offset;
-	int num_checksums;
 	int ret = 0;
 
 	INIT_LIST_HEAD(&bitmaps);
@@ -292,16 +293,6 @@
 	if (!num_entries)
 		goto out;
 
-	/* Setup everything for doing checksumming */
-	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
-	checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
-	if (!checksums)
-		goto out;
-	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
-	disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
-	if (!disk_crcs)
-		goto out;
-
 	ret = readahead_cache(inode);
 	if (ret)
 		goto out;
@@ -311,18 +302,12 @@
 		struct btrfs_free_space *e;
 		void *addr;
 		unsigned long offset = 0;
-		unsigned long start_offset = 0;
 		int need_loop = 0;
 
 		if (!num_entries && !num_bitmaps)
 			break;
 
-		if (index == 0) {
-			start_offset = first_page_offset;
-			offset = start_offset;
-		}
-
-		page = grab_cache_page(inode->i_mapping, index);
+		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
 		if (!page)
 			goto free_cache;
 
@@ -342,8 +327,15 @@
 		if (index == 0) {
 			u64 *gen;
 
-			memcpy(disk_crcs, addr, first_page_offset);
-			gen = addr + (sizeof(u32) * num_checksums);
+			/*
+			 * We put a bogus crc in the front of the first page in
+			 * case old kernels try to mount a fs with the new
+			 * format to make sure they discard the cache.
+			 */
+			addr += sizeof(u64);
+			offset += sizeof(u64);
+
+			gen = addr;
 			if (*gen != BTRFS_I(inode)->generation) {
 				printk(KERN_ERR "btrfs: space cache generation"
 				       " (%llu) does not match inode (%llu)\n",
@@ -355,24 +347,10 @@
 				page_cache_release(page);
 				goto free_cache;
 			}
-			crc = (u32 *)disk_crcs;
+			addr += sizeof(u64);
+			offset += sizeof(u64);
 		}
-		entry = addr + start_offset;
-
-		/* First lets check our crc before we do anything fun */
-		cur_crc = ~(u32)0;
-		cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
-					  PAGE_CACHE_SIZE - start_offset);
-		btrfs_csum_final(cur_crc, (char *)&cur_crc);
-		if (cur_crc != *crc) {
-			printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
-			       index);
-			kunmap(page);
-			unlock_page(page);
-			page_cache_release(page);
-			goto free_cache;
-		}
-		crc++;
+		entry = addr;
 
 		while (1) {
 			if (!num_entries)
@@ -470,8 +448,6 @@
 
 	ret = 1;
 out:
-	kfree(checksums);
-	kfree(disk_crcs);
 	return ret;
 free_cache:
 	__btrfs_remove_free_space_cache(ctl);
@@ -569,8 +545,7 @@
 	struct btrfs_key key;
 	u64 start, end, len;
 	u64 bytes = 0;
-	u32 *crc, *checksums;
-	unsigned long first_page_offset;
+	u32 crc = ~(u32)0;
 	int index = 0, num_pages = 0;
 	int entries = 0;
 	int bitmaps = 0;
@@ -590,34 +565,13 @@
 	num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
 		PAGE_CACHE_SHIFT;
 
-	/* Since the first page has all of our checksums and our generation we
-	 * need to calculate the offset into the page that we can start writing
-	 * our entries.
-	 */
-	first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
-
 	filemap_write_and_wait(inode->i_mapping);
 	btrfs_wait_ordered_range(inode, inode->i_size &
 				 ~(root->sectorsize - 1), (u64)-1);
 
-	/* make sure we don't overflow that first page */
-	if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
-		/* this is really the same as running out of space, where we also return 0 */
-		printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
-		ret = 0;
-		goto out_update;
-	}
-
-	/* We need a checksum per page. */
-	crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
-	if (!crc)
-		return -1;
-
 	pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
-	if (!pages) {
-		kfree(crc);
+	if (!pages)
 		return -1;
-	}
 
 	/* Get the cluster for this block_group if it exists */
 	if (block_group && !list_empty(&block_group->cluster_list))
@@ -640,7 +594,7 @@
 	 * know and don't freak out.
 	 */
 	while (index < num_pages) {
-		page = grab_cache_page(inode->i_mapping, index);
+		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
 		if (!page) {
 			int i;
 
@@ -648,7 +602,7 @@
 				unlock_page(pages[i]);
 				page_cache_release(pages[i]);
 			}
-			goto out_free;
+			goto out;
 		}
 		pages[index] = page;
 		index++;
@@ -668,17 +622,11 @@
 	/* Write out the extent entries */
 	do {
 		struct btrfs_free_space_entry *entry;
-		void *addr;
+		void *addr, *orig;
 		unsigned long offset = 0;
-		unsigned long start_offset = 0;
 
 		next_page = false;
 
-		if (index == 0) {
-			start_offset = first_page_offset;
-			offset = start_offset;
-		}
-
 		if (index >= num_pages) {
 			out_of_space = true;
 			break;
@@ -686,10 +634,26 @@
 
 		page = pages[index];
 
-		addr = kmap(page);
-		entry = addr + start_offset;
+		orig = addr = kmap(page);
+		if (index == 0) {
+			u64 *gen;
 
-		memset(addr, 0, PAGE_CACHE_SIZE);
+			/*
+			 * We're going to put in a bogus crc for this page to
+			 * make sure that old kernels who aren't aware of this
+			 * format will be sure to discard the cache.
+			 */
+			addr += sizeof(u64);
+			offset += sizeof(u64);
+
+			gen = addr;
+			*gen = trans->transid;
+			addr += sizeof(u64);
+			offset += sizeof(u64);
+		}
+		entry = addr;
+
+		memset(addr, 0, PAGE_CACHE_SIZE - offset);
 		while (node && !next_page) {
 			struct btrfs_free_space *e;
 
@@ -752,13 +716,19 @@
 				next_page = true;
 			entry++;
 		}
-		*crc = ~(u32)0;
-		*crc = btrfs_csum_data(root, addr + start_offset, *crc,
-				       PAGE_CACHE_SIZE - start_offset);
-		kunmap(page);
 
-		btrfs_csum_final(*crc, (char *)crc);
-		crc++;
+		/* Generate bogus crc value */
+		if (index == 0) {
+			u32 *tmp;
+			crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
+					      PAGE_CACHE_SIZE - sizeof(u64));
+			btrfs_csum_final(crc, (char *)&crc);
+			crc++;
+			tmp = orig;
+			*tmp = crc;
+		}
+
+		kunmap(page);
 
 		bytes += PAGE_CACHE_SIZE;
 
@@ -779,11 +749,7 @@
 
 		addr = kmap(page);
 		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
-		*crc = ~(u32)0;
-		*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
 		kunmap(page);
-		btrfs_csum_final(*crc, (char *)crc);
-		crc++;
 		bytes += PAGE_CACHE_SIZE;
 
 		list_del_init(&entry->list);
@@ -796,7 +762,7 @@
 				     i_size_read(inode) - 1, &cached_state,
 				     GFP_NOFS);
 		ret = 0;
-		goto out_free;
+		goto out;
 	}
 
 	/* Zero out the rest of the pages just to make sure */
@@ -811,20 +777,6 @@
 		index++;
 	}
 
-	/* Write the checksums and trans id to the first page */
-	{
-		void *addr;
-		u64 *gen;
-
-		page = pages[0];
-
-		addr = kmap(page);
-		memcpy(addr, checksums, sizeof(u32) * num_pages);
-		gen = addr + (sizeof(u32) * num_pages);
-		*gen = trans->transid;
-		kunmap(page);
-	}
-
 	ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
 					    bytes, &cached_state);
 	btrfs_drop_pages(pages, num_pages);
@@ -833,7 +785,7 @@
 
 	if (ret) {
 		ret = 0;
-		goto out_free;
+		goto out;
 	}
 
 	BTRFS_I(inode)->generation = trans->transid;
@@ -850,7 +802,7 @@
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
 				 EXTENT_DIRTY | EXTENT_DELALLOC |
 				 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
-		goto out_free;
+		goto out;
 	}
 	leaf = path->nodes[0];
 	if (ret > 0) {
@@ -866,7 +818,7 @@
 					 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
 					 GFP_NOFS);
 			btrfs_release_path(path);
-			goto out_free;
+			goto out;
 		}
 	}
 	header = btrfs_item_ptr(leaf, path->slots[0],
@@ -879,11 +831,8 @@
 
 	ret = 1;
 
-out_free:
-	kfree(checksums);
+out:
 	kfree(pages);
-
-out_update:
 	if (ret != 1) {
 		invalidate_inode_pages2_range(inode->i_mapping, 0, index);
 		BTRFS_I(inode)->generation = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e91b097..13e6255 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -750,15 +750,6 @@
 	return alloc_hint;
 }
 
-static inline bool is_free_space_inode(struct btrfs_root *root,
-				       struct inode *inode)
-{
-	if (root == root->fs_info->tree_root ||
-	    BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
-		return true;
-	return false;
-}
-
 /*
  * when extent_io.c finds a delayed allocation range in the file,
  * the call backs end up in this code.  The basic idea is to
@@ -791,7 +782,7 @@
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	int ret = 0;
 
-	BUG_ON(is_free_space_inode(root, inode));
+	BUG_ON(btrfs_is_free_space_inode(root, inode));
 	trans = btrfs_join_transaction(root);
 	BUG_ON(IS_ERR(trans));
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1072,7 +1063,7 @@
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
-	nolock = is_free_space_inode(root, inode);
+	nolock = btrfs_is_free_space_inode(root, inode);
 
 	if (nolock)
 		trans = btrfs_join_transaction_nolock(root);
@@ -1298,7 +1289,9 @@
 	if (!(orig->state & EXTENT_DELALLOC))
 		return 0;
 
-	atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+	spin_lock(&BTRFS_I(inode)->lock);
+	BTRFS_I(inode)->outstanding_extents++;
+	spin_unlock(&BTRFS_I(inode)->lock);
 	return 0;
 }
 
@@ -1316,7 +1309,9 @@
 	if (!(other->state & EXTENT_DELALLOC))
 		return 0;
 
-	atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+	spin_lock(&BTRFS_I(inode)->lock);
+	BTRFS_I(inode)->outstanding_extents--;
+	spin_unlock(&BTRFS_I(inode)->lock);
 	return 0;
 }
 
@@ -1337,12 +1332,15 @@
 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
-		bool do_list = !is_free_space_inode(root, inode);
+		bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-		if (*bits & EXTENT_FIRST_DELALLOC)
+		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
-		else
-			atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+		} else {
+			spin_lock(&BTRFS_I(inode)->lock);
+			BTRFS_I(inode)->outstanding_extents++;
+			spin_unlock(&BTRFS_I(inode)->lock);
+		}
 
 		spin_lock(&root->fs_info->delalloc_lock);
 		BTRFS_I(inode)->delalloc_bytes += len;
@@ -1370,12 +1368,15 @@
 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
-		bool do_list = !is_free_space_inode(root, inode);
+		bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-		if (*bits & EXTENT_FIRST_DELALLOC)
+		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
-		else if (!(*bits & EXTENT_DO_ACCOUNTING))
-			atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+			spin_lock(&BTRFS_I(inode)->lock);
+			BTRFS_I(inode)->outstanding_extents--;
+			spin_unlock(&BTRFS_I(inode)->lock);
+		}
 
 		if (*bits & EXTENT_DO_ACCOUNTING)
 			btrfs_delalloc_release_metadata(inode, len);
@@ -1477,7 +1478,7 @@
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	if (is_free_space_inode(root, inode))
+	if (btrfs_is_free_space_inode(root, inode))
 		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
 	else
 		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1726,7 +1727,7 @@
 		return 0;
 	BUG_ON(!ordered_extent);
 
-	nolock = is_free_space_inode(root, inode);
+	nolock = btrfs_is_free_space_inode(root, inode);
 
 	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
 		BUG_ON(!list_empty(&ordered_extent->list));
@@ -2531,13 +2532,6 @@
 
 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_inode_item);
-	if (!leaf->map_token)
-		map_private_extent_buffer(leaf, (unsigned long)inode_item,
-					  sizeof(struct btrfs_inode_item),
-					  &leaf->map_token, &leaf->kaddr,
-					  &leaf->map_start, &leaf->map_len,
-					  KM_USER1);
-
 	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
 	inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
 	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2575,11 +2569,6 @@
 	if (!maybe_acls)
 		cache_no_acl(inode);
 
-	if (leaf->map_token) {
-		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-		leaf->map_token = NULL;
-	}
-
 	btrfs_free_path(path);
 
 	switch (inode->i_mode & S_IFMT) {
@@ -2624,13 +2613,6 @@
 			    struct btrfs_inode_item *item,
 			    struct inode *inode)
 {
-	if (!leaf->map_token)
-		map_private_extent_buffer(leaf, (unsigned long)item,
-					  sizeof(struct btrfs_inode_item),
-					  &leaf->map_token, &leaf->kaddr,
-					  &leaf->map_start, &leaf->map_len,
-					  KM_USER1);
-
 	btrfs_set_inode_uid(leaf, item, inode->i_uid);
 	btrfs_set_inode_gid(leaf, item, inode->i_gid);
 	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2659,11 +2641,6 @@
 	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
 	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
 	btrfs_set_inode_block_group(leaf, item, 0);
-
-	if (leaf->map_token) {
-		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-		leaf->map_token = NULL;
-	}
 }
 
 /*
@@ -2684,7 +2661,7 @@
 	 * The data relocation inode should also be directly updated
 	 * without delay
 	 */
-	if (!is_free_space_inode(root, inode)
+	if (!btrfs_is_free_space_inode(root, inode)
 	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
 		ret = btrfs_delayed_update_inode(trans, root, inode);
 		if (!ret)
@@ -3398,7 +3375,7 @@
 
 	ret = -ENOMEM;
 again:
-	page = grab_cache_page(mapping, index);
+	page = find_or_create_page(mapping, index, GFP_NOFS);
 	if (!page) {
 		btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
 		goto out;
@@ -3634,7 +3611,7 @@
 
 	truncate_inode_pages(&inode->i_data, 0);
 	if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
-			       is_free_space_inode(root, inode)))
+			       btrfs_is_free_space_inode(root, inode)))
 		goto no_delete;
 
 	if (is_bad_inode(inode)) {
@@ -4271,7 +4248,7 @@
 	if (BTRFS_I(inode)->dummy_inode)
 		return 0;
 
-	if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+	if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
 		nolock = true;
 
 	if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4467,7 +4444,7 @@
 	inode->i_generation = BTRFS_I(inode)->generation;
 	btrfs_set_inode_space_info(root, inode);
 
-	if (mode & S_IFDIR)
+	if (S_ISDIR(mode))
 		owner = 0;
 	else
 		owner = 1;
@@ -4512,7 +4489,7 @@
 
 	btrfs_inherit_iflags(inode, dir);
 
-	if ((mode & S_IFREG)) {
+	if (S_ISREG(mode)) {
 		if (btrfs_test_opt(root, NODATASUM))
 			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
 		if (btrfs_test_opt(root, NODATACOW) ||
@@ -6728,8 +6705,9 @@
 	ei->index_cnt = (u64)-1;
 	ei->last_unlink_trans = 0;
 
-	atomic_set(&ei->outstanding_extents, 0);
-	atomic_set(&ei->reserved_extents, 0);
+	spin_lock_init(&ei->lock);
+	ei->outstanding_extents = 0;
+	ei->reserved_extents = 0;
 
 	ei->ordered_data_close = 0;
 	ei->orphan_meta_reserved = 0;
@@ -6767,8 +6745,8 @@
 
 	WARN_ON(!list_empty(&inode->i_dentry));
 	WARN_ON(inode->i_data.nrpages);
-	WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-	WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+	WARN_ON(BTRFS_I(inode)->outstanding_extents);
+	WARN_ON(BTRFS_I(inode)->reserved_extents);
 
 	/*
 	 * This can happen where we create an inode, but somebody else also
@@ -6823,7 +6801,7 @@
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 
 	if (btrfs_root_refs(&root->root_item) == 0 &&
-	    !is_free_space_inode(root, inode))
+	    !btrfs_is_free_space_inode(root, inode))
 		return 1;
 	else
 		return generic_drop_inode(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6225433..0b980af 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -859,8 +859,8 @@
 	/* step one, lock all the pages */
 	for (i = 0; i < num_pages; i++) {
 		struct page *page;
-		page = grab_cache_page(inode->i_mapping,
-					    start_index + i);
+		page = find_or_create_page(inode->i_mapping,
+					    start_index + i, GFP_NOFS);
 		if (!page)
 			break;
 
@@ -930,7 +930,9 @@
 			  GFP_NOFS);
 
 	if (i_done != num_pages) {
-		atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+		spin_lock(&BTRFS_I(inode)->lock);
+		BTRFS_I(inode)->outstanding_extents++;
+		spin_unlock(&BTRFS_I(inode)->lock);
 		btrfs_delalloc_release_space(inode,
 				     (num_pages - i_done) << PAGE_CACHE_SHIFT);
 	}
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 66fa43d..d77b67c 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,185 +24,197 @@
 #include "extent_io.h"
 #include "locking.h"
 
-static inline void spin_nested(struct extent_buffer *eb)
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
+
+/*
+ * if we currently have a spinning reader or writer lock
+ * (indicated by the rw flag) this will bump the count
+ * of blocking holders and drop the spinlock.
+ */
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-	spin_lock(&eb->lock);
+	if (rw == BTRFS_WRITE_LOCK) {
+		if (atomic_read(&eb->blocking_writers) == 0) {
+			WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+			atomic_dec(&eb->spinning_writers);
+			btrfs_assert_tree_locked(eb);
+			atomic_inc(&eb->blocking_writers);
+			write_unlock(&eb->lock);
+		}
+	} else if (rw == BTRFS_READ_LOCK) {
+		btrfs_assert_tree_read_locked(eb);
+		atomic_inc(&eb->blocking_readers);
+		WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+		atomic_dec(&eb->spinning_readers);
+		read_unlock(&eb->lock);
+	}
+	return;
 }
 
 /*
- * Setting a lock to blocking will drop the spinlock and set the
- * flag that forces other procs who want the lock to wait.  After
- * this you can safely schedule with the lock held.
+ * if we currently have a blocking lock, take the spinlock
+ * and drop our blocking count
  */
-void btrfs_set_lock_blocking(struct extent_buffer *eb)
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-	if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-		set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-		spin_unlock(&eb->lock);
+	if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
+		BUG_ON(atomic_read(&eb->blocking_writers) != 1);
+		write_lock(&eb->lock);
+		WARN_ON(atomic_read(&eb->spinning_writers));
+		atomic_inc(&eb->spinning_writers);
+		if (atomic_dec_and_test(&eb->blocking_writers))
+			wake_up(&eb->write_lock_wq);
+	} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
+		BUG_ON(atomic_read(&eb->blocking_readers) == 0);
+		read_lock(&eb->lock);
+		atomic_inc(&eb->spinning_readers);
+		if (atomic_dec_and_test(&eb->blocking_readers))
+			wake_up(&eb->read_lock_wq);
 	}
-	/* exit with the spin lock released and the bit set */
+	return;
 }
 
 /*
- * clearing the blocking flag will take the spinlock again.
- * After this you can't safely schedule
+ * take a spinning read lock.  This will wait for any blocking
+ * writers
  */
-void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+void btrfs_tree_read_lock(struct extent_buffer *eb)
 {
-	if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-		spin_nested(eb);
-		clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-		smp_mb__after_clear_bit();
+again:
+	wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+	read_lock(&eb->lock);
+	if (atomic_read(&eb->blocking_writers)) {
+		read_unlock(&eb->lock);
+		wait_event(eb->write_lock_wq,
+			   atomic_read(&eb->blocking_writers) == 0);
+		goto again;
 	}
-	/* exit with the spin lock held */
+	atomic_inc(&eb->read_locks);
+	atomic_inc(&eb->spinning_readers);
 }
 
 /*
- * unfortunately, many of the places that currently set a lock to blocking
- * don't end up blocking for very long, and often they don't block
- * at all.  For a dbench 50 run, if we don't spin on the blocking bit
- * at all, the context switch rate can jump up to 400,000/sec or more.
- *
- * So, we're still stuck with this crummy spin on the blocking bit,
- * at least until the most common causes of the short blocks
- * can be dealt with.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
  */
-static int btrfs_spin_on_block(struct extent_buffer *eb)
+int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 {
-	int i;
+	if (atomic_read(&eb->blocking_writers))
+		return 0;
 
-	for (i = 0; i < 512; i++) {
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			return 1;
-		if (need_resched())
-			break;
-		cpu_relax();
+	read_lock(&eb->lock);
+	if (atomic_read(&eb->blocking_writers)) {
+		read_unlock(&eb->lock);
+		return 0;
 	}
-	return 0;
-}
-
-/*
- * This is somewhat different from trylock.  It will take the
- * spinlock but if it finds the lock is set to blocking, it will
- * return without the lock held.
- *
- * returns 1 if it was able to take the lock and zero otherwise
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
- */
-int btrfs_try_spin_lock(struct extent_buffer *eb)
-{
-	int i;
-
-	if (btrfs_spin_on_block(eb)) {
-		spin_nested(eb);
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			return 1;
-		spin_unlock(&eb->lock);
-	}
-	/* spin for a bit on the BLOCKING flag */
-	for (i = 0; i < 2; i++) {
-		cpu_relax();
-		if (!btrfs_spin_on_block(eb))
-			break;
-
-		spin_nested(eb);
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			return 1;
-		spin_unlock(&eb->lock);
-	}
-	return 0;
-}
-
-/*
- * the autoremove wake function will return 0 if it tried to wake up
- * a process that was already awake, which means that process won't
- * count as an exclusive wakeup.  The waitq code will continue waking
- * procs until it finds one that was actually sleeping.
- *
- * For btrfs, this isn't quite what we want.  We want a single proc
- * to be notified that the lock is ready for taking.  If that proc
- * already happen to be awake, great, it will loop around and try for
- * the lock.
- *
- * So, btrfs_wake_function always returns 1, even when the proc that we
- * tried to wake up was already awake.
- */
-static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
-			       int sync, void *key)
-{
-	autoremove_wake_function(wait, mode, sync, key);
+	atomic_inc(&eb->read_locks);
+	atomic_inc(&eb->spinning_readers);
 	return 1;
 }
 
 /*
- * returns with the extent buffer spinlocked.
- *
- * This will spin and/or wait as required to take the lock, and then
- * return with the spinlock held.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers or readers
+ */
+int btrfs_try_tree_write_lock(struct extent_buffer *eb)
+{
+	if (atomic_read(&eb->blocking_writers) ||
+	    atomic_read(&eb->blocking_readers))
+		return 0;
+	write_lock(&eb->lock);
+	if (atomic_read(&eb->blocking_writers) ||
+	    atomic_read(&eb->blocking_readers)) {
+		write_unlock(&eb->lock);
+		return 0;
+	}
+	atomic_inc(&eb->write_locks);
+	atomic_inc(&eb->spinning_writers);
+	return 1;
+}
+
+/*
+ * drop a spinning read lock
+ */
+void btrfs_tree_read_unlock(struct extent_buffer *eb)
+{
+	btrfs_assert_tree_read_locked(eb);
+	WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+	atomic_dec(&eb->spinning_readers);
+	atomic_dec(&eb->read_locks);
+	read_unlock(&eb->lock);
+}
+
+/*
+ * drop a blocking read lock
+ */
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
+{
+	btrfs_assert_tree_read_locked(eb);
+	WARN_ON(atomic_read(&eb->blocking_readers) == 0);
+	if (atomic_dec_and_test(&eb->blocking_readers))
+		wake_up(&eb->read_lock_wq);
+	atomic_dec(&eb->read_locks);
+}
+
+/*
+ * take a spinning write lock.  This will wait for both
+ * blocking readers or writers
  */
 int btrfs_tree_lock(struct extent_buffer *eb)
 {
-	DEFINE_WAIT(wait);
-	wait.func = btrfs_wake_function;
-
-	if (!btrfs_spin_on_block(eb))
-		goto sleep;
-
-	while(1) {
-		spin_nested(eb);
-
-		/* nobody is blocking, exit with the spinlock held */
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			return 0;
-
-		/*
-		 * we have the spinlock, but the real owner is blocking.
-		 * wait for them
-		 */
-		spin_unlock(&eb->lock);
-
-		/*
-		 * spin for a bit, and if the blocking flag goes away,
-		 * loop around
-		 */
-		cpu_relax();
-		if (btrfs_spin_on_block(eb))
-			continue;
-sleep:
-		prepare_to_wait_exclusive(&eb->lock_wq, &wait,
-					  TASK_UNINTERRUPTIBLE);
-
-		if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			schedule();
-
-		finish_wait(&eb->lock_wq, &wait);
+again:
+	wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
+	wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+	write_lock(&eb->lock);
+	if (atomic_read(&eb->blocking_readers)) {
+		write_unlock(&eb->lock);
+		wait_event(eb->read_lock_wq,
+			   atomic_read(&eb->blocking_readers) == 0);
+		goto again;
 	}
+	if (atomic_read(&eb->blocking_writers)) {
+		write_unlock(&eb->lock);
+		wait_event(eb->write_lock_wq,
+			   atomic_read(&eb->blocking_writers) == 0);
+		goto again;
+	}
+	WARN_ON(atomic_read(&eb->spinning_writers));
+	atomic_inc(&eb->spinning_writers);
+	atomic_inc(&eb->write_locks);
 	return 0;
 }
 
+/*
+ * drop a spinning or a blocking write lock.
+ */
 int btrfs_tree_unlock(struct extent_buffer *eb)
 {
-	/*
-	 * if we were a blocking owner, we don't have the spinlock held
-	 * just clear the bit and look for waiters
-	 */
-	if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-		smp_mb__after_clear_bit();
-	else
-		spin_unlock(&eb->lock);
+	int blockers = atomic_read(&eb->blocking_writers);
 
-	if (waitqueue_active(&eb->lock_wq))
-		wake_up(&eb->lock_wq);
+	BUG_ON(blockers > 1);
+
+	btrfs_assert_tree_locked(eb);
+	atomic_dec(&eb->write_locks);
+
+	if (blockers) {
+		WARN_ON(atomic_read(&eb->spinning_writers));
+		atomic_dec(&eb->blocking_writers);
+		smp_wmb();
+		wake_up(&eb->write_lock_wq);
+	} else {
+		WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+		atomic_dec(&eb->spinning_writers);
+		write_unlock(&eb->lock);
+	}
 	return 0;
 }
 
 void btrfs_assert_tree_locked(struct extent_buffer *eb)
 {
-	if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-		assert_spin_locked(&eb->lock);
+	BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+	BUG_ON(!atomic_read(&eb->read_locks));
 }
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 5c33a56..17247dd 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -19,11 +19,43 @@
 #ifndef __BTRFS_LOCKING_
 #define __BTRFS_LOCKING_
 
+#define BTRFS_WRITE_LOCK 1
+#define BTRFS_READ_LOCK 2
+#define BTRFS_WRITE_LOCK_BLOCKING 3
+#define BTRFS_READ_LOCK_BLOCKING 4
+
 int btrfs_tree_lock(struct extent_buffer *eb);
 int btrfs_tree_unlock(struct extent_buffer *eb);
 int btrfs_try_spin_lock(struct extent_buffer *eb);
 
-void btrfs_set_lock_blocking(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_tree_read_lock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
+int btrfs_try_tree_read_lock(struct extent_buffer *eb);
+int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+
+static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
+{
+	if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
+		btrfs_tree_unlock(eb);
+	else if (rw == BTRFS_READ_LOCK_BLOCKING)
+		btrfs_tree_read_unlock_blocking(eb);
+	else if (rw == BTRFS_READ_LOCK)
+		btrfs_tree_read_unlock(eb);
+	else
+		BUG();
+}
+
+static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
+{
+	btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
+}
+
+static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+{
+	btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
+}
 #endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 5e0a3dc..59bb176 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2955,7 +2955,8 @@
 			page_cache_sync_readahead(inode->i_mapping,
 						  ra, NULL, index,
 						  last_index + 1 - index);
-			page = grab_cache_page(inode->i_mapping, index);
+			page = find_or_create_page(inode->i_mapping, index,
+						   GFP_NOFS);
 			if (!page) {
 				btrfs_delalloc_release_metadata(inode,
 							PAGE_CACHE_SIZE);
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index c0f7eca..bc1f6ad 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,36 +50,22 @@
 	unsigned long part_offset = (unsigned long)s;			\
 	unsigned long offset = part_offset + offsetof(type, member);	\
 	type *p;							\
-	/* ugly, but we want the fast path here */			\
-	if (eb->map_token && offset >= eb->map_start &&			\
-	    offset + sizeof(((type *)0)->member) <= eb->map_start +	\
-	    eb->map_len) {						\
-		p = (type *)(eb->kaddr + part_offset - eb->map_start);	\
-		return le##bits##_to_cpu(p->member);			\
-	}								\
-	{								\
-		int err;						\
-		char *map_token;					\
-		char *kaddr;						\
-		int unmap_on_exit = (eb->map_token == NULL);		\
-		unsigned long map_start;				\
-		unsigned long map_len;					\
-		u##bits res;						\
-		err = map_extent_buffer(eb, offset,			\
-				sizeof(((type *)0)->member),		\
-				&map_token, &kaddr,			\
-				&map_start, &map_len, KM_USER1);	\
-		if (err) {						\
-			__le##bits leres;				\
-			read_eb_member(eb, s, type, member, &leres);	\
-			return le##bits##_to_cpu(leres);		\
-		}							\
-		p = (type *)(kaddr + part_offset - map_start);		\
-		res = le##bits##_to_cpu(p->member);			\
-		if (unmap_on_exit)					\
-			unmap_extent_buffer(eb, map_token, KM_USER1);	\
-		return res;						\
-	}								\
+	int err;						\
+	char *kaddr;						\
+	unsigned long map_start;				\
+	unsigned long map_len;					\
+	u##bits res;						\
+	err = map_private_extent_buffer(eb, offset,		\
+			sizeof(((type *)0)->member),		\
+			&kaddr, &map_start, &map_len);		\
+	if (err) {						\
+		__le##bits leres;				\
+		read_eb_member(eb, s, type, member, &leres);	\
+		return le##bits##_to_cpu(leres);		\
+	}							\
+	p = (type *)(kaddr + part_offset - map_start);		\
+	res = le##bits##_to_cpu(p->member);			\
+	return res;						\
 }									\
 void btrfs_set_##name(struct extent_buffer *eb,				\
 				    type *s, u##bits val)		\
@@ -87,36 +73,21 @@
 	unsigned long part_offset = (unsigned long)s;			\
 	unsigned long offset = part_offset + offsetof(type, member);	\
 	type *p;							\
-	/* ugly, but we want the fast path here */			\
-	if (eb->map_token && offset >= eb->map_start &&			\
-	    offset + sizeof(((type *)0)->member) <= eb->map_start +	\
-	    eb->map_len) {						\
-		p = (type *)(eb->kaddr + part_offset - eb->map_start);	\
-		p->member = cpu_to_le##bits(val);			\
-		return;							\
-	}								\
-	{								\
-		int err;						\
-		char *map_token;					\
-		char *kaddr;						\
-		int unmap_on_exit = (eb->map_token == NULL);		\
-		unsigned long map_start;				\
-		unsigned long map_len;					\
-		err = map_extent_buffer(eb, offset,			\
-				sizeof(((type *)0)->member),		\
-				&map_token, &kaddr,			\
-				&map_start, &map_len, KM_USER1);	\
-		if (err) {						\
-			__le##bits val2;				\
-			val2 = cpu_to_le##bits(val);			\
-			write_eb_member(eb, s, type, member, &val2);	\
-			return;						\
-		}							\
-		p = (type *)(kaddr + part_offset - map_start);		\
-		p->member = cpu_to_le##bits(val);			\
-		if (unmap_on_exit)					\
-			unmap_extent_buffer(eb, map_token, KM_USER1);	\
-	}								\
+	int err;						\
+	char *kaddr;						\
+	unsigned long map_start;				\
+	unsigned long map_len;					\
+	err = map_private_extent_buffer(eb, offset,		\
+			sizeof(((type *)0)->member),		\
+			&kaddr, &map_start, &map_len);		\
+	if (err) {						\
+		__le##bits val2;				\
+		val2 = cpu_to_le##bits(val);			\
+		write_eb_member(eb, s, type, member, &val2);	\
+		return;						\
+	}							\
+	p = (type *)(kaddr + part_offset - map_start);		\
+	p->member = cpu_to_le##bits(val);			\
 }
 
 #include "ctree.h"
@@ -125,15 +96,6 @@
 		    struct btrfs_disk_key *disk_key, int nr)
 {
 	unsigned long ptr = btrfs_node_key_ptr_offset(nr);
-	if (eb->map_token && ptr >= eb->map_start &&
-	    ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
-		memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
-			sizeof(*disk_key));
-		return;
-	} else if (eb->map_token) {
-		unmap_extent_buffer(eb, eb->map_token, KM_USER1);
-		eb->map_token = NULL;
-	}
 	read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
 		       struct btrfs_key_ptr, key, disk_key);
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 51dcec8..eb55863 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -260,7 +260,7 @@
 {
 	struct btrfs_trans_handle *h;
 	struct btrfs_transaction *cur_trans;
-	int retries = 0;
+	u64 num_bytes = 0;
 	int ret;
 
 	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -274,6 +274,19 @@
 		h->block_rsv = NULL;
 		goto got_it;
 	}
+
+	/*
+	 * Do the reservation before we join the transaction so we can do all
+	 * the appropriate flushing if need be.
+	 */
+	if (num_items > 0 && root != root->fs_info->chunk_root) {
+		num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+		ret = btrfs_block_rsv_add(NULL, root,
+					  &root->fs_info->trans_block_rsv,
+					  num_bytes);
+		if (ret)
+			return ERR_PTR(ret);
+	}
 again:
 	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 	if (!h)
@@ -310,24 +323,9 @@
 		goto again;
 	}
 
-	if (num_items > 0) {
-		ret = btrfs_trans_reserve_metadata(h, root, num_items);
-		if (ret == -EAGAIN && !retries) {
-			retries++;
-			btrfs_commit_transaction(h, root);
-			goto again;
-		} else if (ret == -EAGAIN) {
-			/*
-			 * We have already retried and got EAGAIN, so really we
-			 * don't have space, so set ret to -ENOSPC.
-			 */
-			ret = -ENOSPC;
-		}
-
-		if (ret < 0) {
-			btrfs_end_transaction(h, root);
-			return ERR_PTR(ret);
-		}
+	if (num_bytes) {
+		h->block_rsv = &root->fs_info->trans_block_rsv;
+		h->bytes_reserved = num_bytes;
 	}
 
 got_it:
@@ -499,10 +497,17 @@
 	}
 
 	if (lock && cur_trans->blocked && !cur_trans->in_commit) {
-		if (throttle)
+		if (throttle) {
+			/*
+			 * We may race with somebody else here so end up having
+			 * to call end_transaction on ourselves again, so inc
+			 * our use_count.
+			 */
+			trans->use_count++;
 			return btrfs_commit_transaction(trans, root);
-		else
+		} else {
 			wake_up_process(info->transaction_kthread);
+		}
 	}
 
 	WARN_ON(cur_trans != info->running_transaction);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4ce8a9f..ac278dd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1730,8 +1730,8 @@
 				btrfs_read_buffer(next, ptr_gen);
 
 				btrfs_tree_lock(next);
-				clean_tree_block(trans, root, next);
 				btrfs_set_lock_blocking(next);
+				clean_tree_block(trans, root, next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
@@ -1796,8 +1796,8 @@
 				next = path->nodes[*level];
 
 				btrfs_tree_lock(next);
-				clean_tree_block(trans, root, next);
 				btrfs_set_lock_blocking(next);
+				clean_tree_block(trans, root, next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
@@ -1864,8 +1864,8 @@
 			next = path->nodes[orig_level];
 
 			btrfs_tree_lock(next);
-			clean_tree_block(trans, log, next);
 			btrfs_set_lock_blocking(next);
+			clean_tree_block(trans, log, next);
 			btrfs_wait_tree_block_writeback(next);
 			btrfs_tree_unlock(next);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 19450bc..b89e372 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3595,7 +3595,7 @@
 	if (!sb)
 		return -ENOMEM;
 	btrfs_set_buffer_uptodate(sb);
-	btrfs_set_buffer_lockdep_class(sb, 0);
+	btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
 
 	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
 	array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 5366fe4..d733b9c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -102,43 +102,57 @@
 	if (!path)
 		return -ENOMEM;
 
-	/* first lets see if we already have this xattr */
-	di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-				strlen(name), -1);
-	if (IS_ERR(di)) {
-		ret = PTR_ERR(di);
-		goto out;
-	}
-
-	/* ok we already have this xattr, lets remove it */
-	if (di) {
-		/* if we want create only exit */
-		if (flags & XATTR_CREATE) {
-			ret = -EEXIST;
+	if (flags & XATTR_REPLACE) {
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
+					name_len, -1);
+		if (IS_ERR(di)) {
+			ret = PTR_ERR(di);
 			goto out;
-		}
-
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		BUG_ON(ret);
-		btrfs_release_path(path);
-
-		/* if we don't have a value then we are removing the xattr */
-		if (!value)
-			goto out;
-	} else {
-		btrfs_release_path(path);
-
-		if (flags & XATTR_REPLACE) {
-			/* we couldn't find the attr to replace */
+		} else if (!di) {
 			ret = -ENODATA;
 			goto out;
 		}
+		ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		if (ret)
+			goto out;
+		btrfs_release_path(path);
 	}
 
-	/* ok we have to create a completely new xattr */
+again:
 	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
 				      name, name_len, value, size);
-	BUG_ON(ret);
+	if (ret == -EEXIST) {
+		if (flags & XATTR_CREATE)
+			goto out;
+		/*
+		 * We can't use the path we already have since we won't have the
+		 * proper locking for a delete, so release the path and
+		 * re-lookup to delete the thing.
+		 */
+		btrfs_release_path(path);
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+					name, name_len, -1);
+		if (IS_ERR(di)) {
+			ret = PTR_ERR(di);
+			goto out;
+		} else if (!di) {
+			/* Shouldn't happen but just in case... */
+			btrfs_release_path(path);
+			goto again;
+		}
+
+		ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		if (ret)
+			goto out;
+
+		/*
+		 * We have a value to set, so go back and try to insert it now.
+		 */
+		if (value) {
+			btrfs_release_path(path);
+			goto again;
+		}
+	}
 out:
 	btrfs_free_path(path);
 	return ret;
diff --git a/fs/dcache.c b/fs/dcache.c
index be18598..b05aac3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2138,8 +2138,9 @@
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.  Caller hold
- * rename_lock.
+ * dcache entries should not be moved in this way. Caller must hold
+ * rename_lock, the i_mutex of the source and target directories,
+ * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
  */
 static void __d_move(struct dentry * dentry, struct dentry * target)
 {
@@ -2202,7 +2203,8 @@
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way. See the locking
+ * requirements for __d_move.
  */
 void d_move(struct dentry *dentry, struct dentry *target)
 {
@@ -2320,7 +2322,8 @@
  * @inode: inode to bind to the dentry, to which aliases may be attached
  *
  * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one
+ * root directory alias in its place if there is one. Caller must hold the
+ * i_mutex of the parent directory.
  */
 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 43c7c43..b36c557 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -29,6 +29,7 @@
 #define ECRYPTFS_KERNEL_H
 
 #include <keys/user-type.h>
+#include <keys/encrypted-type.h>
 #include <linux/fs.h>
 #include <linux/fs_stack.h>
 #include <linux/namei.h>
@@ -36,125 +37,18 @@
 #include <linux/hash.h>
 #include <linux/nsproxy.h>
 #include <linux/backing-dev.h>
+#include <linux/ecryptfs.h>
 
-/* Version verification for shared data structures w/ userspace */
-#define ECRYPTFS_VERSION_MAJOR 0x00
-#define ECRYPTFS_VERSION_MINOR 0x04
-#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
-/* These flags indicate which features are supported by the kernel
- * module; userspace tools such as the mount helper read
- * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
- * how to behave. */
-#define ECRYPTFS_VERSIONING_PASSPHRASE            0x00000001
-#define ECRYPTFS_VERSIONING_PUBKEY                0x00000002
-#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
-#define ECRYPTFS_VERSIONING_POLICY                0x00000008
-#define ECRYPTFS_VERSIONING_XATTR                 0x00000010
-#define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
-#define ECRYPTFS_VERSIONING_DEVMISC               0x00000040
-#define ECRYPTFS_VERSIONING_HMAC                  0x00000080
-#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION   0x00000100
-#define ECRYPTFS_VERSIONING_GCM                   0x00000200
-#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
-				  | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
-				  | ECRYPTFS_VERSIONING_PUBKEY \
-				  | ECRYPTFS_VERSIONING_XATTR \
-				  | ECRYPTFS_VERSIONING_MULTKEY \
-				  | ECRYPTFS_VERSIONING_DEVMISC \
-				  | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
-#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
-#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
-#define ECRYPTFS_SALT_SIZE 8
-#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
-/* The original signature size is only for what is stored on disk; all
- * in-memory representations are expanded hex, so it better adapted to
- * be passed around or referenced on the command line */
-#define ECRYPTFS_SIG_SIZE 8
-#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
-#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
-#define ECRYPTFS_MAX_KEY_BYTES 64
-#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
 #define ECRYPTFS_DEFAULT_IV_BYTES 16
-#define ECRYPTFS_FILE_VERSION 0x03
 #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
 #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
 #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
 #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
-#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
 #define ECRYPTFS_DEFAULT_NUM_USERS 4
 #define ECRYPTFS_MAX_NUM_USERS 32768
 #define ECRYPTFS_XATTR_NAME "user.ecryptfs"
 
-#define RFC2440_CIPHER_DES3_EDE 0x02
-#define RFC2440_CIPHER_CAST_5 0x03
-#define RFC2440_CIPHER_BLOWFISH 0x04
-#define RFC2440_CIPHER_AES_128 0x07
-#define RFC2440_CIPHER_AES_192 0x08
-#define RFC2440_CIPHER_AES_256 0x09
-#define RFC2440_CIPHER_TWOFISH 0x0a
-#define RFC2440_CIPHER_CAST_6 0x0b
-
-#define RFC2440_CIPHER_RSA 0x01
-
-/**
- * For convenience, we may need to pass around the encrypted session
- * key between kernel and userspace because the authentication token
- * may not be extractable.  For example, the TPM may not release the
- * private key, instead requiring the encrypted data and returning the
- * decrypted data.
- */
-struct ecryptfs_session_key {
-#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
-#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
-#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
-#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
-	u32 flags;
-	u32 encrypted_key_size;
-	u32 decrypted_key_size;
-	u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
-	u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
-};
-
-struct ecryptfs_password {
-	u32 password_bytes;
-	s32 hash_algo;
-	u32 hash_iterations;
-	u32 session_key_encryption_key_bytes;
-#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
-#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
-	u32 flags;
-	/* Iterated-hash concatenation of salt and passphrase */
-	u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
-	u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
-	/* Always in expanded hex */
-	u8 salt[ECRYPTFS_SALT_SIZE];
-};
-
-enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
-
-struct ecryptfs_private_key {
-	u32 key_size;
-	u32 data_len;
-	u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
-	char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
-	u8 data[];
-};
-
-/* May be a password or a private key */
-struct ecryptfs_auth_tok {
-	u16 version; /* 8-bit major and 8-bit minor */
-	u16 token_type;
-#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
-	u32 flags;
-	struct ecryptfs_session_key session_key;
-	u8 reserved[32];
-	union {
-		struct ecryptfs_password password;
-		struct ecryptfs_private_key private_key;
-	} token;
-} __attribute__ ((packed));
-
 void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
 extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
 extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
@@ -185,11 +79,47 @@
 	} param;
 };
 
+#if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE)
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_encrypted_key_payload_data(struct key *key)
+{
+	if (key->type == &key_type_encrypted)
+		return (struct ecryptfs_auth_tok *)
+			(&((struct encrypted_key_payload *)key->payload.data)->payload_data);
+	else
+		return NULL;
+}
+
+static inline struct key *ecryptfs_get_encrypted_key(char *sig)
+{
+	return request_key(&key_type_encrypted, sig, NULL);
+}
+
+#else
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_encrypted_key_payload_data(struct key *key)
+{
+	return NULL;
+}
+
+static inline struct key *ecryptfs_get_encrypted_key(char *sig)
+{
+	return ERR_PTR(-ENOKEY);
+}
+
+#endif /* CONFIG_ENCRYPTED_KEYS */
+
 static inline struct ecryptfs_auth_tok *
 ecryptfs_get_key_payload_data(struct key *key)
 {
-	return (struct ecryptfs_auth_tok *)
-		(((struct user_key_payload*)key->payload.data)->data);
+	struct ecryptfs_auth_tok *auth_tok;
+
+	auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
+	if (!auth_tok)
+		return (struct ecryptfs_auth_tok *)
+			(((struct user_key_payload *)key->payload.data)->data);
+	else
+		return auth_tok;
 }
 
 #define ECRYPTFS_MAX_KEYSET_SIZE 1024
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index fa8049e..c472533 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1635,11 +1635,14 @@
 
 	(*auth_tok_key) = request_key(&key_type_user, sig, NULL);
 	if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
-		printk(KERN_ERR "Could not find key with description: [%s]\n",
-		       sig);
-		rc = process_request_key_err(PTR_ERR(*auth_tok_key));
-		(*auth_tok_key) = NULL;
-		goto out;
+		(*auth_tok_key) = ecryptfs_get_encrypted_key(sig);
+		if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
+			printk(KERN_ERR "Could not find key with description: [%s]\n",
+			      sig);
+			rc = process_request_key_err(PTR_ERR(*auth_tok_key));
+			(*auth_tok_key) = NULL;
+			goto out;
+		}
 	}
 	down_write(&(*auth_tok_key)->sem);
 	rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok);
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 5c0a6a4..503bfb0 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -61,7 +61,6 @@
 #else
 #include <linux/sched.h>
 #define ext2_get_acl	NULL
-#define ext2_get_acl	NULL
 #define ext2_set_acl	NULL
 
 static inline int
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 516516e..3bc073a 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1018,13 +1018,13 @@
 		fsname++;
 	if (lm->lm_mount == NULL) {
 		fs_info(sdp, "Now mounting FS...\n");
-		complete(&sdp->sd_locking_init);
+		complete_all(&sdp->sd_locking_init);
 		return 0;
 	}
 	ret = lm->lm_mount(sdp, fsname);
 	if (ret == 0)
 		fs_info(sdp, "Joined cluster. Now mounting FS...\n");
-	complete(&sdp->sd_locking_init);
+	complete_all(&sdp->sd_locking_init);
 	return ret;
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index a48fa53..d0c72ff 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -361,9 +361,11 @@
 
 static inline void inode_sb_list_del(struct inode *inode)
 {
-	spin_lock(&inode_sb_list_lock);
-	list_del_init(&inode->i_sb_list);
-	spin_unlock(&inode_sb_list_lock);
+	if (!list_empty(&inode->i_sb_list)) {
+		spin_lock(&inode_sb_list_lock);
+		list_del_init(&inode->i_sb_list);
+		spin_unlock(&inode_sb_list_lock);
+	}
 }
 
 static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -796,6 +798,29 @@
 EXPORT_SYMBOL(get_next_ino);
 
 /**
+ *	new_inode_pseudo 	- obtain an inode
+ *	@sb: superblock
+ *
+ *	Allocates a new inode for given superblock.
+ *	Inode wont be chained in superblock s_inodes list
+ *	This means :
+ *	- fs can't be unmount
+ *	- quotas, fsnotify, writeback can't work
+ */
+struct inode *new_inode_pseudo(struct super_block *sb)
+{
+	struct inode *inode = alloc_inode(sb);
+
+	if (inode) {
+		spin_lock(&inode->i_lock);
+		inode->i_state = 0;
+		spin_unlock(&inode->i_lock);
+		INIT_LIST_HEAD(&inode->i_sb_list);
+	}
+	return inode;
+}
+
+/**
  *	new_inode 	- obtain an inode
  *	@sb: superblock
  *
@@ -813,13 +838,9 @@
 
 	spin_lock_prefetch(&inode_sb_list_lock);
 
-	inode = alloc_inode(sb);
-	if (inode) {
-		spin_lock(&inode->i_lock);
-		inode->i_state = 0;
-		spin_unlock(&inode->i_lock);
+	inode = new_inode_pseudo(sb);
+	if (inode)
 		inode_sb_list_add(inode);
-	}
 	return inode;
 }
 EXPORT_SYMBOL(new_inode);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index eeead33..b81b35d 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -80,7 +80,7 @@
 				  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
 	if (ret) {
 		jffs2_free_raw_inode(ri);
-		if (S_ISLNK(inode->i_mode & S_IFMT))
+		if (S_ISLNK(inode->i_mode))
 			 kfree(mdata);
 		return ret;
 	}
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 4496872..9cbd11a 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -3161,7 +3161,7 @@
 {
 	int rc;
 	int dbitno, word, rembits, nb, nwords, wbitno, agno;
-	s8 oldroot, *leaf;
+	s8 oldroot;
 	struct dmaptree *tp = (struct dmaptree *) & dp->tree;
 
 	/* save the current value of the root (i.e. maximum free string)
@@ -3169,9 +3169,6 @@
 	 */
 	oldroot = tp->stree[ROOT];
 
-	/* pick up a pointer to the leaves of the dmap tree */
-	leaf = tp->stree + LEAFIND;
-
 	/* determine the bit number and word within the dmap of the
 	 * starting block.
 	 */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f6cc0c0..af96060 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1143,7 +1143,6 @@
 	struct jfs_log *log;
 	struct tblock *tblk;
 	struct lrd *lrd;
-	int lsn;
 	struct inode *ip;
 	struct jfs_inode_info *jfs_ip;
 	int k, n;
@@ -1310,7 +1309,7 @@
 	 */
 	lrd->type = cpu_to_le16(LOG_COMMIT);
 	lrd->length = 0;
-	lsn = lmLog(log, tblk, lrd, NULL);
+	lmLog(log, tblk, lrd, NULL);
 
 	lmGroupCommit(log, tblk);
 
@@ -2935,7 +2934,6 @@
 {
 	struct inode *ip;
 	struct jfs_inode_info *jfs_ip;
-	int rc;
 	tid_t tid;
 
 	do {
@@ -2961,7 +2959,7 @@
 				 */
 				TXN_UNLOCK();
 				tid = txBegin(ip->i_sb, COMMIT_INODE);
-				rc = txCommit(tid, 1, &ip, 0);
+				txCommit(tid, 1, &ip, 0);
 				txEnd(tid);
 				mutex_unlock(&jfs_ip->commit_mutex);
 
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 29b1f1a..e17545e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -893,7 +893,7 @@
 	unchar *i_fastsymlink;
 	s64 xlen = 0;
 	int bmask = 0, xsize;
-	s64 extent = 0, xaddr;
+	s64 xaddr;
 	struct metapage *mp;
 	struct super_block *sb;
 	struct tblock *tblk;
@@ -993,7 +993,6 @@
 			txAbort(tid, 0);
 			goto out3;
 		}
-		extent = xaddr;
 		ip->i_size = ssize - 1;
 		while (ssize) {
 			/* This is kind of silly since PATH_MAX == 4K */
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e374050..8392cb8 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -302,7 +302,8 @@
 				/* We appear to be out of the grace period */
 				wake_up_all(&host->h_gracewait);
 			}
-			dprintk("lockd: server returns status %d\n", resp->status);
+			dprintk("lockd: server returns status %d\n",
+				ntohl(resp->status));
 			return 0;	/* Okay, call complete */
 		}
 
@@ -690,7 +691,8 @@
 		goto out;
 
 	if (resp->status != nlm_lck_denied_nolocks)
-		printk("lockd: unexpected unlock status: %d\n", resp->status);
+		printk("lockd: unexpected unlock status: %d\n",
+			ntohl(resp->status));
 	/* What to do now? I'm out of my depth... */
 	status = -ENOLCK;
 out:
@@ -843,6 +845,7 @@
 		return -ENOLCK;
 #endif
 	}
-	printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
+	printk(KERN_NOTICE "lockd: unexpected server status %d\n",
+		 ntohl(status));
 	return -ENOLCK;
 }
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 8151554..2cde5d9 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,6 +77,7 @@
 config NFS_V4_1
 	bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
 	depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+	select SUNRPC_BACKCHANNEL
 	select PNFS_FILE_LAYOUT
 	help
 	  This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954..74780f9 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@
 static u32 initiate_file_draining(struct nfs_client *clp,
 				  struct cb_layoutrecallargs *args)
 {
+	struct nfs_server *server;
 	struct pnfs_layout_hdr *lo;
 	struct inode *ino;
 	bool found = false;
@@ -118,21 +119,28 @@
 	LIST_HEAD(free_me_list);
 
 	spin_lock(&clp->cl_lock);
-	list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
-		if (nfs_compare_fh(&args->cbl_fh,
-				   &NFS_I(lo->plh_inode)->fh))
-			continue;
-		ino = igrab(lo->plh_inode);
-		if (!ino)
-			continue;
-		found = true;
-		/* Without this, layout can be freed as soon
-		 * as we release cl_lock.
-		 */
-		get_layout_hdr(lo);
-		break;
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		list_for_each_entry(lo, &server->layouts, plh_layouts) {
+			if (nfs_compare_fh(&args->cbl_fh,
+					   &NFS_I(lo->plh_inode)->fh))
+				continue;
+			ino = igrab(lo->plh_inode);
+			if (!ino)
+				continue;
+			found = true;
+			/* Without this, layout can be freed as soon
+			 * as we release cl_lock.
+			 */
+			get_layout_hdr(lo);
+			break;
+		}
+		if (found)
+			break;
 	}
+	rcu_read_unlock();
 	spin_unlock(&clp->cl_lock);
+
 	if (!found)
 		return NFS4ERR_NOMATCHING_LAYOUT;
 
@@ -154,6 +162,7 @@
 static u32 initiate_bulk_draining(struct nfs_client *clp,
 				  struct cb_layoutrecallargs *args)
 {
+	struct nfs_server *server;
 	struct pnfs_layout_hdr *lo;
 	struct inode *ino;
 	u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@
 	};
 
 	spin_lock(&clp->cl_lock);
-	list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
 		if ((args->cbl_recall_type == RETURN_FSID) &&
-		    memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
-			   &args->cbl_fsid, sizeof(struct nfs_fsid)))
+		    memcmp(&server->fsid, &args->cbl_fsid,
+			   sizeof(struct nfs_fsid)))
 			continue;
-		if (!igrab(lo->plh_inode))
-			continue;
-		get_layout_hdr(lo);
-		BUG_ON(!list_empty(&lo->plh_bulk_recall));
-		list_add(&lo->plh_bulk_recall, &recall_list);
+
+		list_for_each_entry(lo, &server->layouts, plh_layouts) {
+			if (!igrab(lo->plh_inode))
+				continue;
+			get_layout_hdr(lo);
+			BUG_ON(!list_empty(&lo->plh_bulk_recall));
+			list_add(&lo->plh_bulk_recall, &recall_list);
+		}
 	}
+	rcu_read_unlock();
 	spin_unlock(&clp->cl_lock);
+
 	list_for_each_entry_safe(lo, tmp,
 				 &recall_list, plh_bulk_recall) {
 		ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b8..19ea7d9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@
 	cred = rpc_lookup_machine_cred();
 	if (!IS_ERR(cred))
 		clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
-	INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
 	nfs_fscache_get_client_cookie(clp);
 
 	return clp;
@@ -293,6 +290,7 @@
 	nfs4_deviceid_purge_client(clp);
 
 	kfree(clp->cl_hostname);
+	kfree(clp->server_scope);
 	kfree(clp);
 
 	dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@
 	INIT_LIST_HEAD(&server->client_link);
 	INIT_LIST_HEAD(&server->master_link);
 	INIT_LIST_HEAD(&server->delegations);
+	INIT_LIST_HEAD(&server->layouts);
 
 	atomic_set(&server->active, 0);
 
@@ -1464,7 +1463,7 @@
 	dprintk("<-- %s %p\n", __func__, clp);
 	return clp;
 }
-EXPORT_SYMBOL(nfs4_set_ds_client);
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
 
 /*
  * Session has been established, and the client marked ready.
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2a..321a66b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@
 	return err;
 }
 
-static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_server *server,
+		struct nfs_delegation *delegation)
 {
-	struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
-
 	set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
-	set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+	set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
 }
 
 /**
@@ -441,7 +440,7 @@
 		if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
 			continue;
 		if (delegation->type & flags)
-			nfs_mark_return_delegation(delegation);
+			nfs_mark_return_delegation(server, delegation);
 	}
 }
 
@@ -508,7 +507,7 @@
 	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
 			continue;
-		nfs_mark_return_delegation(delegation);
+		nfs_mark_return_delegation(server, delegation);
 	}
 }
 
@@ -539,7 +538,8 @@
 int nfs_async_inode_return_delegation(struct inode *inode,
 				      const nfs4_stateid *stateid)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_delegation *delegation;
 
 	rcu_read_lock();
@@ -549,7 +549,7 @@
 		rcu_read_unlock();
 		return -ENOENT;
 	}
-	nfs_mark_return_delegation(delegation);
+	nfs_mark_return_delegation(server, delegation);
 	rcu_read_unlock();
 
 	nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347..ab12913 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,6 +277,9 @@
 extern char *nfs_path(char **p, struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 extern struct vfsmount *nfs_d_automount(struct path *path);
+#ifdef CONFIG_NFS_V4
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+#endif
 
 /* getroot.c */
 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
+struct nfs_pageio_descriptor;
 /* read.c */
 extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
 			     const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+		struct list_head *head);
+
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+		struct list_head *head);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_write_data *p);
 extern int nfs_initiate_write(struct nfs_write_data *data,
 			      struct rpc_clnt *clnt,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1f063ba..8102391 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@
 }
 
 #ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
 {
 	struct gss_api_mech *mech;
 	struct xdr_netobj oid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b788f2e..1909ee8 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
 	NFS4CLNT_LEASE_CONFIRM,
+	NFS4CLNT_SERVER_SCOPE_MISMATCH,
 };
 
 enum nfs4_session_state {
@@ -66,6 +67,8 @@
 			int cache_reply);
 	int	(*validate_stateid)(struct nfs_delegation *,
 			const nfs4_stateid *);
+	int	(*find_root_sec)(struct nfs_server *, struct nfs_fh *,
+			struct nfs_fsinfo *);
 	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
 	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
 	const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@
 extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+				      struct server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03ab..be93a62 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -334,6 +334,9 @@
 		__func__, data->inode->i_ino,
 		data->args.pgbase, (size_t)data->args.count, offset);
 
+	if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+		return PNFS_NOT_ATTEMPTED;
+
 	/* Retrieve the correct rpc_client for the byte range */
 	j = nfs4_fl_calc_j_index(lseg, offset);
 	idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@
 		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
 		return PNFS_NOT_ATTEMPTED;
 	}
-	dprintk("%s USE DS:ip %x %hu\n", __func__,
-		ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+	dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
 
 	/* No multipath support. Use first DS */
 	data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@
 	struct nfs_fh *fh;
 	int status;
 
+	if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+		return PNFS_NOT_ATTEMPTED;
+
 	/* Retrieve the correct rpc_client for the byte range */
 	j = nfs4_fl_calc_j_index(lseg, offset);
 	idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@
 		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
 		return PNFS_NOT_ATTEMPTED;
 	}
-	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
 		data->inode->i_ino, sync, (size_t) data->args.count, offset,
-		ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+		ds->ds_remotestr);
 
 	data->write_done_cb = filelayout_write_done_cb;
 	data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@
 
 	dprintk("--> %s\n", __func__);
 
+	/* FIXME: remove this check when layout segment support is added */
+	if (lgr->range.offset != 0 ||
+	    lgr->range.length != NFS4_MAX_UINT64) {
+		dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
+			__func__);
+		goto out;
+	}
+
 	if (fl->pattern_offset > lgr->range.offset) {
 		dprintk("%s pattern_offset %lld too large\n",
 				__func__, fl->pattern_offset);
@@ -449,6 +462,10 @@
 			goto out;
 	} else
 		dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+	/* Found deviceid is being reaped */
+	if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+			goto out_put;
+
 	fl->dsaddr = dsaddr;
 
 	if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@
  * return true  : coalesce page
  * return false : don't coalesce page
  */
-bool
+static bool
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
@@ -670,8 +687,6 @@
 	    !nfs_generic_pg_test(pgio, prev, req))
 		return false;
 
-	if (!pgio->pg_lseg)
-		return 1;
 	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
 	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
 	stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@
 	return (p_stripe == r_stripe);
 }
 
+void
+filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+			struct nfs_page *req)
+{
+	BUG_ON(pgio->pg_lseg != NULL);
+
+	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+					   req->wb_context,
+					   0,
+					   NFS4_MAX_UINT64,
+					   IOMODE_READ,
+					   GFP_KERNEL);
+	/* If no lseg, fall back to read through mds */
+	if (pgio->pg_lseg == NULL)
+		nfs_pageio_reset_read_mds(pgio);
+}
+
+void
+filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+			 struct nfs_page *req)
+{
+	BUG_ON(pgio->pg_lseg != NULL);
+
+	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+					   req->wb_context,
+					   0,
+					   NFS4_MAX_UINT64,
+					   IOMODE_RW,
+					   GFP_NOFS);
+	/* If no lseg, fall back to write through mds */
+	if (pgio->pg_lseg == NULL)
+		nfs_pageio_reset_write_mds(pgio);
+}
+
+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+	.pg_init = filelayout_pg_init_read,
+	.pg_test = filelayout_pg_test,
+	.pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+	.pg_init = filelayout_pg_init_write,
+	.pg_test = filelayout_pg_test,
+	.pg_doio = pnfs_generic_pg_writepages,
+};
+
 static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
 {
 	return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@
 	.owner			= THIS_MODULE,
 	.alloc_lseg		= filelayout_alloc_lseg,
 	.free_lseg		= filelayout_free_lseg,
-	.pg_test		= filelayout_pg_test,
+	.pg_read_ops		= &filelayout_pg_read_ops,
+	.pg_write_ops		= &filelayout_pg_write_ops,
 	.mark_pnfs_commit	= filelayout_mark_pnfs_commit,
 	.choose_commit_list	= filelayout_choose_commit_list,
 	.commit_pagelist	= filelayout_commit_pagelist,
@@ -902,5 +964,7 @@
 	pnfs_unregister_layoutdriver(&filelayout_type);
 }
 
+MODULE_ALIAS("nfs-layouttype4-1");
+
 module_init(nfs4filelayout_init);
 module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e..2e42284 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@
 };
 
 /* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+	struct sockaddr_storage	da_addr;
+	size_t			da_addrlen;
+	struct list_head	da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
+	char			*da_remotestr;	/* human readable addr+port */
+};
+
 struct nfs4_pnfs_ds {
 	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
-	u32			ds_ip_addr;
-	u32			ds_port;
+	char			*ds_remotestr;	/* comma sep list of addrs */
+	struct list_head	ds_addrs;
 	struct nfs_client	*ds_clp;
 	atomic_t		ds_count;
 };
@@ -89,6 +96,12 @@
 			    generic_hdr);
 }
 
+static inline struct nfs4_deviceid_node *
+FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
+{
+	return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
+}
+
 extern struct nfs_fh *
 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
 
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf13..ed388aa 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@
 		printk("%s NULL device\n", __func__);
 		return;
 	}
-	printk("        ip_addr %x port %hu\n"
+	printk("        ds %s\n"
 		"        ref count %d\n"
 		"        client %p\n"
 		"        cl_exchange_flags %x\n",
-		ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+		ds->ds_remotestr,
 		atomic_read(&ds->ds_count), ds->ds_clp,
 		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
 }
 
-/* nfs4_ds_cache_lock is held */
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
+{
+	struct sockaddr_in *a, *b;
+	struct sockaddr_in6 *a6, *b6;
+
+	if (addr1->sa_family != addr2->sa_family)
+		return false;
+
+	switch (addr1->sa_family) {
+	case AF_INET:
+		a = (struct sockaddr_in *)addr1;
+		b = (struct sockaddr_in *)addr2;
+
+		if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+		    a->sin_port == b->sin_port)
+			return true;
+		break;
+
+	case AF_INET6:
+		a6 = (struct sockaddr_in6 *)addr1;
+		b6 = (struct sockaddr_in6 *)addr2;
+
+		/* LINKLOCAL addresses must have matching scope_id */
+		if (ipv6_addr_scope(&a6->sin6_addr) ==
+		    IPV6_ADDR_SCOPE_LINKLOCAL &&
+		    a6->sin6_scope_id != b6->sin6_scope_id)
+			return false;
+
+		if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+		    a6->sin6_port == b6->sin6_port)
+			return true;
+		break;
+
+	default:
+		dprintk("%s: unhandled address family: %u\n",
+			__func__, addr1->sa_family);
+		return false;
+	}
+
+	return false;
+}
+
+/*
+ * Lookup DS by addresses.  The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
 static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+_data_server_lookup_locked(struct list_head *dsaddrs)
 {
 	struct nfs4_pnfs_ds *ds;
+	struct nfs4_pnfs_ds_addr *da1, *da2;
 
-	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
-			ntohl(ip_addr), ntohs(port));
-
-	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
-		if (ds->ds_ip_addr == ip_addr &&
-		    ds->ds_port == port) {
-			return ds;
+	list_for_each_entry(da1, dsaddrs, da_node) {
+		list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+			list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+				if (same_sockaddr(
+					(struct sockaddr *)&da1->da_addr,
+					(struct sockaddr *)&da2->da_addr))
+					return ds;
+			}
 		}
 	}
 	return NULL;
 }
 
 /*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+				    struct list_head *dsaddrs2)
+{
+	struct nfs4_pnfs_ds_addr *da1, *da2;
+	size_t count1 = 0,
+	       count2 = 0;
+
+	list_for_each_entry(da1, dsaddrs1, da_node)
+		count1++;
+
+	list_for_each_entry(da2, dsaddrs2, da_node) {
+		bool found = false;
+		count2++;
+		list_for_each_entry(da1, dsaddrs1, da_node) {
+			if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+				(struct sockaddr *)&da2->da_addr)) {
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			return false;
+	}
+
+	return (count1 == count2);
+}
+
+/*
  * Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
  */
 static int
 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
 {
-	struct nfs_client *clp;
-	struct sockaddr_in sin;
+	struct nfs_client *clp = ERR_PTR(-EIO);
+	struct nfs4_pnfs_ds_addr *da;
 	int status = 0;
 
-	dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
-		ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
 		mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = ds->ds_ip_addr;
-	sin.sin_port = ds->ds_port;
+	BUG_ON(list_empty(&ds->ds_addrs));
 
-	clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
-				 sizeof(sin), IPPROTO_TCP);
+	list_for_each_entry(da, &ds->ds_addrs, da_node) {
+		dprintk("%s: DS %s: trying address %s\n",
+			__func__, ds->ds_remotestr, da->da_remotestr);
+
+		clp = nfs4_set_ds_client(mds_srv->nfs_client,
+				 (struct sockaddr *)&da->da_addr,
+				 da->da_addrlen, IPPROTO_TCP);
+		if (!IS_ERR(clp))
+			break;
+	}
+
 	if (IS_ERR(clp)) {
 		status = PTR_ERR(clp);
 		goto out;
@@ -115,8 +200,8 @@
 			goto out_put;
 		}
 		ds->ds_clp = clp;
-		dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
-			ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+		dprintk("%s [existing] server=%s\n", __func__,
+			ds->ds_remotestr);
 		goto out;
 	}
 
@@ -135,8 +220,7 @@
 		goto out_put;
 
 	ds->ds_clp = clp;
-	dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
-		ntohs(ds->ds_port));
+	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 out:
 	return status;
 out_put:
@@ -147,12 +231,25 @@
 static void
 destroy_ds(struct nfs4_pnfs_ds *ds)
 {
+	struct nfs4_pnfs_ds_addr *da;
+
 	dprintk("--> %s\n", __func__);
 	ifdebug(FACILITY)
 		print_ds(ds);
 
 	if (ds->ds_clp)
 		nfs_put_client(ds->ds_clp);
+
+	while (!list_empty(&ds->ds_addrs)) {
+		da = list_first_entry(&ds->ds_addrs,
+				      struct nfs4_pnfs_ds_addr,
+				      da_node);
+		list_del_init(&da->da_node);
+		kfree(da->da_remotestr);
+		kfree(da);
+	}
+
+	kfree(ds->ds_remotestr);
 	kfree(ds);
 }
 
@@ -179,31 +276,96 @@
 	kfree(dsaddr);
 }
 
-static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
 {
-	struct nfs4_pnfs_ds *tmp_ds, *ds;
+	struct nfs4_pnfs_ds_addr *da;
+	char *remotestr;
+	size_t len;
+	char *p;
 
-	ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+	len = 3;        /* '{', '}' and eol */
+	list_for_each_entry(da, dsaddrs, da_node) {
+		len += strlen(da->da_remotestr) + 1;    /* string plus comma */
+	}
+
+	remotestr = kzalloc(len, gfp_flags);
+	if (!remotestr)
+		return NULL;
+
+	p = remotestr;
+	*(p++) = '{';
+	len--;
+	list_for_each_entry(da, dsaddrs, da_node) {
+		size_t ll = strlen(da->da_remotestr);
+
+		if (ll > len)
+			goto out_err;
+
+		memcpy(p, da->da_remotestr, ll);
+		p += ll;
+		len -= ll;
+
+		if (len < 1)
+			goto out_err;
+		(*p++) = ',';
+		len--;
+	}
+	if (len < 2)
+		goto out_err;
+	*(p++) = '}';
+	*p = '\0';
+	return remotestr;
+out_err:
+	kfree(remotestr);
+	return NULL;
+}
+
+static struct nfs4_pnfs_ds *
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+{
+	struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+	char *remotestr;
+
+	if (list_empty(dsaddrs)) {
+		dprintk("%s: no addresses defined\n", __func__);
+		goto out;
+	}
+
+	ds = kzalloc(sizeof(*ds), gfp_flags);
 	if (!ds)
 		goto out;
 
+	/* this is only used for debugging, so it's ok if its NULL */
+	remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
+
 	spin_lock(&nfs4_ds_cache_lock);
-	tmp_ds = _data_server_lookup_locked(ip_addr, port);
+	tmp_ds = _data_server_lookup_locked(dsaddrs);
 	if (tmp_ds == NULL) {
-		ds->ds_ip_addr = ip_addr;
-		ds->ds_port = port;
+		INIT_LIST_HEAD(&ds->ds_addrs);
+		list_splice_init(dsaddrs, &ds->ds_addrs);
+		ds->ds_remotestr = remotestr;
 		atomic_set(&ds->ds_count, 1);
 		INIT_LIST_HEAD(&ds->ds_node);
 		ds->ds_clp = NULL;
 		list_add(&ds->ds_node, &nfs4_data_server_cache);
-		dprintk("%s add new data server ip 0x%x\n", __func__,
-			ds->ds_ip_addr);
+		dprintk("%s add new data server %s\n", __func__,
+			ds->ds_remotestr);
 	} else {
+		if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+							 dsaddrs)) {
+			dprintk("%s:  multipath address mismatch: %s != %s",
+				__func__, tmp_ds->ds_remotestr, remotestr);
+		}
+		kfree(remotestr);
 		kfree(ds);
 		atomic_inc(&tmp_ds->ds_count);
-		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
-			__func__, tmp_ds->ds_ip_addr,
+		dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+			__func__, tmp_ds->ds_remotestr,
 			atomic_read(&tmp_ds->ds_count));
 		ds = tmp_ds;
 	}
@@ -213,18 +375,22 @@
 }
 
 /*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
  */
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
 {
-	struct nfs4_pnfs_ds *ds = NULL;
-	char *buf;
-	const char *ipend, *pstr;
-	u32 ip_addr, port;
-	int nlen, rlen, i;
+	struct nfs4_pnfs_ds_addr *da = NULL;
+	char *buf, *portstr;
+	u32 port;
+	int nlen, rlen;
 	int tmp[2];
 	__be32 *p;
+	char *netid, *match_netid;
+	size_t len, match_netid_len;
+	char *startsep = "";
+	char *endsep = "";
+
 
 	/* r_netid */
 	p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@
 	if (unlikely(!p))
 		goto out_err;
 
-	/* Check that netid is "tcp" */
-	if (nlen != 3 ||  memcmp((char *)p, "tcp", 3)) {
-		dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+	netid = kmalloc(nlen+1, gfp_flags);
+	if (unlikely(!netid))
 		goto out_err;
-	}
 
-	/* r_addr */
+	netid[nlen] = '\0';
+	memcpy(netid, p, nlen);
+
+	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 	p = xdr_inline_decode(streamp, 4);
 	if (unlikely(!p))
-		goto out_err;
+		goto out_free_netid;
 	rlen = be32_to_cpup(p);
 
 	p = xdr_inline_decode(streamp, rlen);
 	if (unlikely(!p))
-		goto out_err;
+		goto out_free_netid;
 
-	/* ipv6 length plus port is legal */
-	if (rlen > INET6_ADDRSTRLEN + 8) {
+	/* port is ".ABC.DEF", 8 chars max */
+	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
 		dprintk("%s: Invalid address, length %d\n", __func__,
 			rlen);
-		goto out_err;
+		goto out_free_netid;
 	}
 	buf = kmalloc(rlen + 1, gfp_flags);
 	if (!buf) {
 		dprintk("%s: Not enough memory\n", __func__);
-		goto out_err;
+		goto out_free_netid;
 	}
 	buf[rlen] = '\0';
 	memcpy(buf, p, rlen);
 
-	/* replace the port dots with dashes for the in4_pton() delimiter*/
-	for (i = 0; i < 2; i++) {
-		char *res = strrchr(buf, '.');
-		if (!res) {
-			dprintk("%s: Failed finding expected dots in port\n",
-				__func__);
-			goto out_free;
-		}
-		*res = '-';
+	/* replace port '.' with '-' */
+	portstr = strrchr(buf, '.');
+	if (!portstr) {
+		dprintk("%s: Failed finding expected dot in port\n",
+			__func__);
+		goto out_free_buf;
+	}
+	*portstr = '-';
+
+	/* find '.' between address and port */
+	portstr = strrchr(buf, '.');
+	if (!portstr) {
+		dprintk("%s: Failed finding expected dot between address and "
+			"port\n", __func__);
+		goto out_free_buf;
+	}
+	*portstr = '\0';
+
+	da = kzalloc(sizeof(*da), gfp_flags);
+	if (unlikely(!da))
+		goto out_free_buf;
+
+	INIT_LIST_HEAD(&da->da_node);
+
+	if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+		      sizeof(da->da_addr))) {
+		dprintk("%s: error parsing address %s\n", __func__, buf);
+		goto out_free_da;
 	}
 
-	/* Currently only support ipv4 address */
-	if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
-		dprintk("%s: Only ipv4 addresses supported\n", __func__);
-		goto out_free;
-	}
-
-	/* port */
-	pstr = ipend;
-	sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+	portstr++;
+	sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
 	port = htons((tmp[0] << 8) | (tmp[1]));
 
-	ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
-	dprintk("%s: Decoded address and port %s\n", __func__, buf);
-out_free:
+	switch (da->da_addr.ss_family) {
+	case AF_INET:
+		((struct sockaddr_in *)&da->da_addr)->sin_port = port;
+		da->da_addrlen = sizeof(struct sockaddr_in);
+		match_netid = "tcp";
+		match_netid_len = 3;
+		break;
+
+	case AF_INET6:
+		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
+		da->da_addrlen = sizeof(struct sockaddr_in6);
+		match_netid = "tcp6";
+		match_netid_len = 4;
+		startsep = "[";
+		endsep = "]";
+		break;
+
+	default:
+		dprintk("%s: unsupported address family: %u\n",
+			__func__, da->da_addr.ss_family);
+		goto out_free_da;
+	}
+
+	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
+		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
+			__func__, netid, match_netid);
+		goto out_free_da;
+	}
+
+	/* save human readable address */
+	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
+	da->da_remotestr = kzalloc(len, gfp_flags);
+
+	/* NULL is ok, only used for dprintk */
+	if (da->da_remotestr)
+		snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
+			 buf, endsep, ntohs(port));
+
+	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 	kfree(buf);
+	kfree(netid);
+	return da;
+
+out_free_da:
+	kfree(da);
+out_free_buf:
+	dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
+	kfree(buf);
+out_free_netid:
+	kfree(netid);
 out_err:
-	return ds;
+	return NULL;
 }
 
 /* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@
 	struct xdr_stream stream;
 	struct xdr_buf buf;
 	struct page *scratch;
+	struct list_head dsaddrs;
+	struct nfs4_pnfs_ds_addr *da;
 
 	/* set up xdr stream */
 	scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@
 				NFS_SERVER(ino)->nfs_client,
 				&pdev->dev_id);
 
+	INIT_LIST_HEAD(&dsaddrs);
+
 	for (i = 0; i < dsaddr->ds_num; i++) {
 		int j;
 		u32 mp_count;
@@ -395,48 +624,43 @@
 			goto out_err_free_deviceid;
 
 		mp_count = be32_to_cpup(p); /* multipath count */
-		if (mp_count > 1) {
-			printk(KERN_WARNING
-			       "%s: Multipath count %d not supported, "
-			       "skipping all greater than 1\n", __func__,
-				mp_count);
-		}
 		for (j = 0; j < mp_count; j++) {
-			if (j == 0) {
-				dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-					ino, gfp_flags);
-				if (dsaddr->ds_list[i] == NULL)
-					goto out_err_free_deviceid;
-			} else {
-				u32 len;
-				/* skip extra multipath */
+			da = decode_ds_addr(&stream, gfp_flags);
+			if (da)
+				list_add_tail(&da->da_node, &dsaddrs);
+		}
+		if (list_empty(&dsaddrs)) {
+			dprintk("%s: no suitable DS addresses found\n",
+				__func__);
+			goto out_err_free_deviceid;
+		}
 
-				/* read len, skip */
-				p = xdr_inline_decode(&stream, 4);
-				if (unlikely(!p))
-					goto out_err_free_deviceid;
-				len = be32_to_cpup(p);
+		dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+		if (!dsaddr->ds_list[i])
+			goto out_err_drain_dsaddrs;
 
-				p = xdr_inline_decode(&stream, len);
-				if (unlikely(!p))
-					goto out_err_free_deviceid;
-
-				/* read len, skip */
-				p = xdr_inline_decode(&stream, 4);
-				if (unlikely(!p))
-					goto out_err_free_deviceid;
-				len = be32_to_cpup(p);
-
-				p = xdr_inline_decode(&stream, len);
-				if (unlikely(!p))
-					goto out_err_free_deviceid;
-			}
+		/* If DS was already in cache, free ds addrs */
+		while (!list_empty(&dsaddrs)) {
+			da = list_first_entry(&dsaddrs,
+					      struct nfs4_pnfs_ds_addr,
+					      da_node);
+			list_del_init(&da->da_node);
+			kfree(da->da_remotestr);
+			kfree(da);
 		}
 	}
 
 	__free_page(scratch);
 	return dsaddr;
 
+out_err_drain_dsaddrs:
+	while (!list_empty(&dsaddrs)) {
+		da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
+				      da_node);
+		list_del_init(&da->da_node);
+		kfree(da->da_remotestr);
+		kfree(da);
+	}
 out_err_free_deviceid:
 	nfs4_fl_free_deviceid(dsaddr);
 	/* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@
 
 static void
 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
-			       int err, u32 ds_addr)
+			       int err, const char *ds_remotestr)
 {
 	u32 *p = (u32 *)&dsaddr->id_node.deviceid;
 
-	printk(KERN_ERR "NFS: data server %x connection error %d."
+	printk(KERN_ERR "NFS: data server %s connection error %d."
 		" Deviceid [%x%x%x%x] marked out of use.\n",
-		ds_addr, err, p[0], p[1], p[2], p[3]);
+		ds_remotestr, err, p[0], p[1], p[2], p[3]);
 
 	spin_lock(&nfs4_ds_cache_lock);
 	dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@
 		err = nfs4_ds_connect(s, ds);
 		if (err) {
 			filelayout_mark_devid_negative(dsaddr, err,
-						       ntohl(ds->ds_ip_addr));
+						       ds->ds_remotestr);
 			return NULL;
 		}
 	}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 26bece8..079614d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -80,7 +80,10 @@
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 			    struct nfs_fattr *fattr, struct iattr *sattr,
 			    struct nfs4_state *state);
-
+#ifdef CONFIG_NFS_V4_1
+static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
+static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
+#endif
 /* Prevent leaks of NFSv4 errors into userland */
 static int nfs4_map_errors(int err)
 {
@@ -1689,6 +1692,20 @@
 	return ret;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	int status;
+	struct nfs_server *server = NFS_SERVER(state->inode);
+
+	status = nfs41_test_stateid(server, state);
+	if (status == NFS_OK)
+		return 0;
+	nfs41_free_stateid(server, state);
+	return nfs4_open_expired(sp, state);
+}
+#endif
+
 /*
  * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
  * fields corresponding to attributes that were used to store the verifier.
@@ -2252,13 +2269,14 @@
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 			      struct nfs_fsinfo *info)
 {
+	int minor_version = server->nfs_client->cl_minorversion;
 	int status = nfs4_lookup_root(server, fhandle, info);
 	if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
 		/*
 		 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
 		 * by nfs4_map_errors() as this function exits.
 		 */
-		status = nfs4_find_root_sec(server, fhandle, info);
+		status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
@@ -4441,6 +4459,20 @@
 	return err;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+	int status;
+	struct nfs_server *server = NFS_SERVER(state->inode);
+
+	status = nfs41_test_stateid(server, state);
+	if (status == NFS_OK)
+		return 0;
+	nfs41_free_stateid(server, state);
+	return nfs4_lock_expired(state, request);
+}
+#endif
+
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -4779,6 +4811,16 @@
 	return -NFS4ERR_INVAL;
 }
 
+static bool
+nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+{
+	if (a->server_scope_sz == b->server_scope_sz &&
+	    memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
+		return true;
+
+	return false;
+}
+
 /*
  * nfs4_proc_exchange_id()
  *
@@ -4821,9 +4863,31 @@
 				init_utsname()->domainname,
 				clp->cl_rpcclient->cl_auth->au_flavor);
 
+	res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
+	if (unlikely(!res.server_scope))
+		return -ENOMEM;
+
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 	if (!status)
 		status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+
+	if (!status) {
+		if (clp->server_scope &&
+		    !nfs41_same_server_scope(clp->server_scope,
+					     res.server_scope)) {
+			dprintk("%s: server_scope mismatch detected\n",
+				__func__);
+			set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
+			kfree(clp->server_scope);
+			clp->server_scope = NULL;
+		}
+
+		if (!clp->server_scope)
+			clp->server_scope = res.server_scope;
+		else
+			kfree(res.server_scope);
+	}
+
 	dprintk("<-- %s status= %d\n", __func__, status);
 	return status;
 }
@@ -5704,7 +5768,7 @@
 {
 	struct nfs4_layoutreturn *lrp = calldata;
 	struct nfs_server *server;
-	struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+	struct pnfs_layout_hdr *lo = lrp->args.layout;
 
 	dprintk("--> %s\n", __func__);
 
@@ -5733,7 +5797,7 @@
 	struct nfs4_layoutreturn *lrp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	put_layout_hdr(NFS_I(lrp->args.inode)->layout);
+	put_layout_hdr(lrp->args.layout);
 	kfree(calldata);
 	dprintk("<-- %s\n", __func__);
 }
@@ -5901,6 +5965,143 @@
 	rpc_put_task(task);
 	return status;
 }
+
+static int
+_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+		    struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+	struct nfs41_secinfo_no_name_args args = {
+		.style = SECINFO_STYLE_CURRENT_FH,
+	};
+	struct nfs4_secinfo_res res = {
+		.flavors = flavors,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+}
+
+static int
+nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+			   struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+		switch (err) {
+		case 0:
+		case -NFS4ERR_WRONGSEC:
+		case -NFS4ERR_NOTSUPP:
+			break;
+		default:
+			err = nfs4_handle_exception(server, err, &exception);
+		}
+	} while (exception.retry);
+	return err;
+}
+
+static int
+nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
+		    struct nfs_fsinfo *info)
+{
+	int err;
+	struct page *page;
+	rpc_authflavor_t flavor;
+	struct nfs4_secinfo_flavors *flavors;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	flavors = page_address(page);
+	err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+
+	/*
+	 * Fall back on "guess and check" method if
+	 * the server doesn't support SECINFO_NO_NAME
+	 */
+	if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+		err = nfs4_find_root_sec(server, fhandle, info);
+		goto out_freepage;
+	}
+	if (err)
+		goto out_freepage;
+
+	flavor = nfs_find_best_sec(flavors);
+	if (err == 0)
+		err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+
+out_freepage:
+	put_page(page);
+	if (err == -EACCES)
+		return -EPERM;
+out:
+	return err;
+}
+static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+	int status;
+	struct nfs41_test_stateid_args args = {
+		.stateid = &state->stateid,
+	};
+	struct nfs41_test_stateid_res res;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+	status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+	return status;
+}
+
+static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs41_test_stateid(server, state),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+	int status;
+	struct nfs41_free_stateid_args args = {
+		.stateid = &state->stateid,
+	};
+	struct nfs41_free_stateid_res res;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+	status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+	return status;
+}
+
+static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_free_stateid(server, state),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5937,8 +6138,8 @@
 struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
 	.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
 	.state_flag_bit	= NFS_STATE_RECLAIM_NOGRACE,
-	.recover_open	= nfs4_open_expired,
-	.recover_lock	= nfs4_lock_expired,
+	.recover_open	= nfs41_open_expired,
+	.recover_lock	= nfs41_lock_expired,
 	.establish_clid = nfs41_init_clientid,
 	.get_clid_cred	= nfs4_get_exchange_id_cred,
 };
@@ -5962,6 +6163,7 @@
 	.minor_version = 0,
 	.call_sync = _nfs4_call_sync,
 	.validate_stateid = nfs4_validate_delegation_stateid,
+	.find_root_sec = nfs4_find_root_sec,
 	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
 	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
 	.state_renewal_ops = &nfs40_state_renewal_ops,
@@ -5972,6 +6174,7 @@
 	.minor_version = 1,
 	.call_sync = _nfs4_call_sync_session,
 	.validate_stateid = nfs41_validate_delegation_stateid,
+	.find_root_sec = nfs41_find_root_sec,
 	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
 	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
 	.state_renewal_ops = &nfs41_state_renewal_ops,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7acfe88..72ab97e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1643,7 +1643,14 @@
 				goto out_error;
 			}
 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
-			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+
+			if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
+					       &clp->cl_state))
+				nfs4_state_start_reclaim_nograce(clp);
+			else
+				set_bit(NFS4CLNT_RECLAIM_REBOOT,
+					&clp->cl_state);
+
 			pnfs_destroy_all_layouts(clp);
 		}
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6e8f3b..c191a9b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -343,6 +343,14 @@
 				1 /* FIXME: opaque lrf_body always empty at the moment */)
 #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
 				1 + decode_stateid_maxsz)
+#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
+#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
+#define encode_test_stateid_maxsz	(op_encode_hdr_maxsz + 2 + \
+					 XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_test_stateid_maxsz	(op_decode_hdr_maxsz + 2 + 1)
+#define encode_free_stateid_maxsz	(op_encode_hdr_maxsz + 1 + \
+					 XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_free_stateid_maxsz	(op_decode_hdr_maxsz + 1)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -772,6 +780,26 @@
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_layoutreturn_maxsz)
+#define NFS4_enc_secinfo_no_name_sz	(compound_encode_hdr_maxsz + \
+					encode_sequence_maxsz + \
+					encode_putrootfh_maxsz +\
+					encode_secinfo_no_name_maxsz)
+#define NFS4_dec_secinfo_no_name_sz	(compound_decode_hdr_maxsz + \
+					decode_sequence_maxsz + \
+					decode_putrootfh_maxsz + \
+					decode_secinfo_no_name_maxsz)
+#define NFS4_enc_test_stateid_sz	(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_test_stateid_maxsz)
+#define NFS4_dec_test_stateid_sz	(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_test_stateid_maxsz)
+#define NFS4_enc_free_stateid_sz	(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_free_stateid_maxsz)
+#define NFS4_dec_free_stateid_sz	(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_free_stateid_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1938,6 +1966,46 @@
 	hdr->nops++;
 	hdr->replen += decode_layoutreturn_maxsz;
 }
+
+static int
+encode_secinfo_no_name(struct xdr_stream *xdr,
+		       const struct nfs41_secinfo_no_name_args *args,
+		       struct compound_hdr *hdr)
+{
+	__be32 *p;
+	p = reserve_space(xdr, 8);
+	*p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
+	*p++ = cpu_to_be32(args->style);
+	hdr->nops++;
+	hdr->replen += decode_secinfo_no_name_maxsz;
+	return 0;
+}
+
+static void encode_test_stateid(struct xdr_stream *xdr,
+				struct nfs41_test_stateid_args *args,
+				struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
+	*p++ = cpu_to_be32(OP_TEST_STATEID);
+	*p++ = cpu_to_be32(1);
+	xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+	hdr->nops++;
+	hdr->replen += decode_test_stateid_maxsz;
+}
+
+static void encode_free_stateid(struct xdr_stream *xdr,
+				struct nfs41_free_stateid_args *args,
+				struct compound_hdr *hdr)
+{
+	__be32 *p;
+	p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
+	*p++ = cpu_to_be32(OP_FREE_STATEID);
+	xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+	hdr->nops++;
+	hdr->replen += decode_free_stateid_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2790,6 +2858,59 @@
 	encode_layoutreturn(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
+
+/*
+ * Encode SECINFO_NO_NAME request
+ */
+static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs41_secinfo_no_name_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_secinfo_no_name(xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
+/*
+ *  Encode TEST_STATEID request
+ */
+static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs41_test_stateid_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_test_stateid(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
+/*
+ *  Encode FREE_STATEID request
+ */
+static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs41_free_stateid_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_free_stateid(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4977,11 +5098,17 @@
 	if (unlikely(status))
 		return status;
 
-	/* Throw away server_scope */
+	/* Save server_scope */
 	status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 	if (unlikely(status))
 		return status;
 
+	if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+		return -EIO;
+
+	memcpy(res->server_scope->server_scope, dummy_str, dummy);
+	res->server_scope->server_scope_sz = dummy;
+
 	/* Throw away Implementation id array */
 	status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 	if (unlikely(status))
@@ -5322,6 +5449,55 @@
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
+
+static int decode_test_stateid(struct xdr_stream *xdr,
+			       struct nfs41_test_stateid_res *res)
+{
+	__be32 *p;
+	int status;
+	int num_res;
+
+	status = decode_op_hdr(xdr, OP_TEST_STATEID);
+	if (status)
+		return status;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	num_res = be32_to_cpup(p++);
+	if (num_res != 1)
+		goto out;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->status = be32_to_cpup(p++);
+	return res->status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+out:
+	return -EIO;
+}
+
+static int decode_free_stateid(struct xdr_stream *xdr,
+			       struct nfs41_free_stateid_res *res)
+{
+	__be32 *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_FREE_STATEID);
+	if (status)
+		return status;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->status = be32_to_cpup(p++);
+	return res->status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -6461,6 +6637,72 @@
 out:
 	return status;
 }
+
+/*
+ * Decode SECINFO_NO_NAME response
+ */
+static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					struct nfs4_secinfo_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putrootfh(xdr);
+	if (status)
+		goto out;
+	status = decode_secinfo(xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode TEST_STATEID response
+ */
+static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs41_test_stateid_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_test_stateid(xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode FREE_STATEID response
+ */
+static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs41_free_stateid_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_free_stateid(xdr, res);
+out:
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /**
@@ -6663,6 +6905,9 @@
 	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
 	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
 	PROC(LAYOUTRETURN,	enc_layoutreturn,	dec_layoutreturn),
+	PROC(SECINFO_NO_NAME,	enc_secinfo_no_name,	dec_secinfo_no_name),
+	PROC(TEST_STATEID,	enc_test_stateid,	dec_test_stateid),
+	PROC(FREE_STATEID,	enc_free_stateid,	dec_free_stateid),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3..9383ca7 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1000,13 +1000,22 @@
 	if (!pnfs_generic_pg_test(pgio, prev, req))
 		return false;
 
-	if (pgio->pg_lseg == NULL)
-		return true;
-
 	return pgio->pg_count + req->wb_bytes <=
 			OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
 }
 
+static const struct nfs_pageio_ops objio_pg_read_ops = {
+	.pg_init = pnfs_generic_pg_init_read,
+	.pg_test = objio_pg_test,
+	.pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops objio_pg_write_ops = {
+	.pg_init = pnfs_generic_pg_init_write,
+	.pg_test = objio_pg_test,
+	.pg_doio = pnfs_generic_pg_writepages,
+};
+
 static struct pnfs_layoutdriver_type objlayout_type = {
 	.id = LAYOUT_OSD2_OBJECTS,
 	.name = "LAYOUT_OSD2_OBJECTS",
@@ -1020,7 +1029,8 @@
 
 	.read_pagelist           = objlayout_read_pagelist,
 	.write_pagelist          = objlayout_write_pagelist,
-	.pg_test                 = objio_pg_test,
+	.pg_read_ops             = &objio_pg_read_ops,
+	.pg_write_ops            = &objio_pg_write_ops,
 
 	.free_deviceid_node	 = objio_free_deviceid_node,
 
@@ -1055,5 +1065,7 @@
 	       __func__);
 }
 
+MODULE_ALIAS("nfs-layouttype4-2");
+
 module_init(objlayout_init);
 module_exit(objlayout_exit);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18449f4..b60970c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -230,7 +230,7 @@
  */
 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     struct inode *inode,
-		     int (*doio)(struct nfs_pageio_descriptor *),
+		     const struct nfs_pageio_ops *pg_ops,
 		     size_t bsize,
 		     int io_flags)
 {
@@ -240,13 +240,12 @@
 	desc->pg_bsize = bsize;
 	desc->pg_base = 0;
 	desc->pg_moreio = 0;
+	desc->pg_recoalesce = 0;
 	desc->pg_inode = inode;
-	desc->pg_doio = doio;
+	desc->pg_ops = pg_ops;
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
-	desc->pg_test = nfs_generic_pg_test;
-	pnfs_pageio_init(desc, inode);
 }
 
 /**
@@ -276,7 +275,7 @@
 		return false;
 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
 		return false;
-	return pgio->pg_test(pgio, prev, req);
+	return pgio->pg_ops->pg_test(pgio, prev, req);
 }
 
 /**
@@ -297,6 +296,8 @@
 		if (!nfs_can_coalesce_requests(prev, req, desc))
 			return 0;
 	} else {
+		if (desc->pg_ops->pg_init)
+			desc->pg_ops->pg_init(desc, req);
 		desc->pg_base = req->wb_pgbase;
 	}
 	nfs_list_remove_request(req);
@@ -311,7 +312,7 @@
 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
 {
 	if (!list_empty(&desc->pg_list)) {
-		int error = desc->pg_doio(desc);
+		int error = desc->pg_ops->pg_doio(desc);
 		if (error < 0)
 			desc->pg_error = error;
 		else
@@ -331,7 +332,7 @@
  * Returns true if the request 'req' was successfully coalesced into the
  * existing list of pages 'desc'.
  */
-int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			   struct nfs_page *req)
 {
 	while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@
 		if (desc->pg_error < 0)
 			return 0;
 		desc->pg_moreio = 0;
+		if (desc->pg_recoalesce)
+			return 0;
 	}
 	return 1;
 }
 
+static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+{
+	LIST_HEAD(head);
+
+	do {
+		list_splice_init(&desc->pg_list, &head);
+		desc->pg_bytes_written -= desc->pg_count;
+		desc->pg_count = 0;
+		desc->pg_base = 0;
+		desc->pg_recoalesce = 0;
+
+		while (!list_empty(&head)) {
+			struct nfs_page *req;
+
+			req = list_first_entry(&head, struct nfs_page, wb_list);
+			nfs_list_remove_request(req);
+			if (__nfs_pageio_add_request(desc, req))
+				continue;
+			if (desc->pg_error < 0)
+				return 0;
+			break;
+		}
+	} while (desc->pg_recoalesce);
+	return 1;
+}
+
+int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+		struct nfs_page *req)
+{
+	int ret;
+
+	do {
+		ret = __nfs_pageio_add_request(desc, req);
+		if (ret)
+			break;
+		if (desc->pg_error < 0)
+			break;
+		ret = nfs_do_recoalesce(desc);
+	} while (ret);
+	return ret;
+}
+
 /**
  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
  * @desc: pointer to io descriptor
  */
 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
 {
-	nfs_pageio_doio(desc);
+	for (;;) {
+		nfs_pageio_doio(desc);
+		if (!desc->pg_recoalesce)
+			break;
+		if (!nfs_do_recoalesce(desc))
+			break;
+	}
 }
 
 /**
@@ -369,7 +420,7 @@
 	if (!list_empty(&desc->pg_list)) {
 		struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
 		if (index != prev->wb_index + 1)
-			nfs_pageio_doio(desc);
+			nfs_pageio_complete(desc);
 	}
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 29c0ca7f..38e5508 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include "internal.h"
 #include "pnfs.h"
 #include "iostat.h"
@@ -448,11 +449,20 @@
 void
 pnfs_destroy_all_layouts(struct nfs_client *clp)
 {
+	struct nfs_server *server;
 	struct pnfs_layout_hdr *lo;
 	LIST_HEAD(tmp_list);
 
+	nfs4_deviceid_mark_client_invalid(clp);
+	nfs4_deviceid_purge_client(clp);
+
 	spin_lock(&clp->cl_lock);
-	list_splice_init(&clp->cl_layouts, &tmp_list);
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		if (!list_empty(&server->layouts))
+			list_splice_init(&server->layouts, &tmp_list);
+	}
+	rcu_read_unlock();
 	spin_unlock(&clp->cl_lock);
 
 	while (!list_empty(&tmp_list)) {
@@ -661,6 +671,7 @@
 	lrp->args.stateid = stateid;
 	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
 	lrp->args.inode = ino;
+	lrp->args.layout = lo;
 	lrp->clp = NFS_SERVER(ino)->nfs_client;
 
 	status = nfs4_proc_layoutreturn(lrp);
@@ -920,7 +931,8 @@
 	};
 	unsigned pg_offset;
 	struct nfs_inode *nfsi = NFS_I(ino);
-	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(ino);
+	struct nfs_client *clp = server->nfs_client;
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_segment *lseg = NULL;
 	bool first = false;
@@ -964,7 +976,7 @@
 		 */
 		spin_lock(&clp->cl_lock);
 		BUG_ON(!list_empty(&lo->plh_layouts));
-		list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+		list_add_tail(&lo->plh_layouts, &server->layouts);
 		spin_unlock(&clp->cl_lock);
 	}
 
@@ -973,7 +985,8 @@
 		arg.offset -= pg_offset;
 		arg.length += pg_offset;
 	}
-	arg.length = PAGE_CACHE_ALIGN(arg.length);
+	if (arg.length != NFS4_MAX_UINT64)
+		arg.length = PAGE_CACHE_ALIGN(arg.length);
 
 	lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
 	if (!lseg && first) {
@@ -991,6 +1004,7 @@
 	spin_unlock(&ino->i_lock);
 	goto out;
 }
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
 
 int
 pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1062,71 @@
 	goto out;
 }
 
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+	BUG_ON(pgio->pg_lseg != NULL);
+
+	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+					   req->wb_context,
+					   req_offset(req),
+					   req->wb_bytes,
+					   IOMODE_READ,
+					   GFP_KERNEL);
+	/* If no lseg, fall back to read through mds */
+	if (pgio->pg_lseg == NULL)
+		nfs_pageio_reset_read_mds(pgio);
+
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+	BUG_ON(pgio->pg_lseg != NULL);
+
+	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+					   req->wb_context,
+					   req_offset(req),
+					   req->wb_bytes,
+					   IOMODE_RW,
+					   GFP_NOFS);
+	/* If no lseg, fall back to write through mds */
+	if (pgio->pg_lseg == NULL)
+		nfs_pageio_reset_write_mds(pgio);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
+bool
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+	if (ld == NULL)
+		return false;
+	nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+	return true;
+}
+
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+	if (ld == NULL)
+		return false;
+	nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+	return true;
+}
+
 bool
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
 {
-	enum pnfs_iomode access_type;
-	gfp_t gfp_flags;
-
-	/* We assume that pg_ioflags == 0 iff we're reading a page */
-	if (pgio->pg_ioflags == 0) {
-		access_type = IOMODE_READ;
-		gfp_flags = GFP_KERNEL;
-	} else {
-		access_type = IOMODE_RW;
-		gfp_flags = GFP_NOFS;
-	}
-
-	if (pgio->pg_lseg == NULL) {
-		if (pgio->pg_count != prev->wb_bytes)
-			return true;
-		/* This is first coelesce call for a series of nfs_pages */
-		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-						   prev->wb_context,
-						   req_offset(prev),
-						   pgio->pg_count,
-						   access_type,
-						   gfp_flags);
-		if (pgio->pg_lseg == NULL)
-			return true;
-	}
+	if (pgio->pg_lseg == NULL)
+		return nfs_generic_pg_test(pgio, prev, req);
 
 	/*
 	 * Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1170,30 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+		struct nfs_write_data *data)
+{
+	list_splice_tail_init(&data->pages, &desc->pg_list);
+	if (data->req && list_empty(&data->req->wb_list))
+		nfs_list_add_request(data->req, &desc->pg_list);
+	nfs_pageio_reset_write_mds(desc);
+	desc->pg_recoalesce = 1;
+	nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
-			const struct rpc_call_ops *call_ops, int how)
+			const struct rpc_call_ops *call_ops,
+			struct pnfs_layout_segment *lseg,
+			int how)
 {
 	struct inode *inode = wdata->inode;
 	enum pnfs_try_status trypnfs;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
 	wdata->mds_ops = call_ops;
+	wdata->lseg = get_lseg(lseg);
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
 		inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1209,44 @@
 	return trypnfs;
 }
 
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+	struct nfs_write_data *data;
+	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+	desc->pg_lseg = NULL;
+	while (!list_empty(head)) {
+		enum pnfs_try_status trypnfs;
+
+		data = list_entry(head->next, struct nfs_write_data, list);
+		list_del_init(&data->list);
+
+		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+		if (trypnfs == PNFS_NOT_ATTEMPTED)
+			pnfs_write_through_mds(desc, data);
+	}
+	put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+	LIST_HEAD(head);
+	int ret;
+
+	ret = nfs_generic_flush(desc, &head);
+	if (ret != 0) {
+		put_lseg(desc->pg_lseg);
+		desc->pg_lseg = NULL;
+		return ret;
+	}
+	pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1167,18 +1270,32 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+		struct nfs_read_data *data)
+{
+	list_splice_tail_init(&data->pages, &desc->pg_list);
+	if (data->req && list_empty(&data->req->wb_list))
+		nfs_list_add_request(data->req, &desc->pg_list);
+	nfs_pageio_reset_read_mds(desc);
+	desc->pg_recoalesce = 1;
+	nfs_readdata_release(data);
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
-enum pnfs_try_status
+static enum pnfs_try_status
 pnfs_try_to_read_data(struct nfs_read_data *rdata,
-		       const struct rpc_call_ops *call_ops)
+		       const struct rpc_call_ops *call_ops,
+		       struct pnfs_layout_segment *lseg)
 {
 	struct inode *inode = rdata->inode;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	enum pnfs_try_status trypnfs;
 
 	rdata->mds_ops = call_ops;
+	rdata->lseg = get_lseg(lseg);
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
 		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,6 +1311,44 @@
 	return trypnfs;
 }
 
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+	struct nfs_read_data *data;
+	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+	desc->pg_lseg = NULL;
+	while (!list_empty(head)) {
+		enum pnfs_try_status trypnfs;
+
+		data = list_entry(head->next, struct nfs_read_data, list);
+		list_del_init(&data->list);
+
+		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+		if (trypnfs == PNFS_NOT_ATTEMPTED)
+			pnfs_read_through_mds(desc, data);
+	}
+	put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+	LIST_HEAD(head);
+	int ret;
+
+	ret = nfs_generic_pagein(desc, &head);
+	if (ret != 0) {
+		put_lseg(desc->pg_lseg);
+		desc->pg_lseg = NULL;
+		return ret;
+	}
+	pnfs_do_multiple_reads(desc, &head);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
 /*
  * Currently there is only one (whole file) write lseg.
  */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 96bf4e6..078670d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,8 @@
 	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
 	/* test for nfs page cache coalescing */
-	bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	const struct nfs_pageio_ops *pg_read_ops;
+	const struct nfs_pageio_ops *pg_write_ops;
 
 	/* Returns true if layoutdriver wants to divert this request to
 	 * driver's commit routine.
@@ -148,16 +149,16 @@
 /* pnfs.c */
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
-struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   loff_t pos, u64 count, enum pnfs_iomode access_type,
-		   gfp_t gfp_flags);
+
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
-enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
-					     const struct rpc_call_ops *, int);
-enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
-					    const struct rpc_call_ops *);
+void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
+void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
 bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -182,6 +183,19 @@
 int _pnfs_return_layout(struct inode *);
 int pnfs_ld_write_done(struct nfs_write_data *);
 int pnfs_ld_read_done(struct nfs_read_data *);
+struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
+					       struct nfs_open_context *ctx,
+					       loff_t pos,
+					       u64 count,
+					       enum pnfs_iomode iomode,
+					       gfp_t gfp_flags);
+
+void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
+
+/* nfs4_deviceid_flags */
+enum {
+	NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
+};
 
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
@@ -189,13 +203,13 @@
 	struct hlist_node		tmpnode;
 	const struct pnfs_layoutdriver_type *ld;
 	const struct nfs_client		*nfs_client;
+	unsigned long 			flags;
 	struct nfs4_deviceid		deviceid;
 	atomic_t			ref;
 };
 
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
-struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
 			     const struct pnfs_layoutdriver_type *,
@@ -293,15 +307,6 @@
 	return 0;
 }
 
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
-				    struct inode *inode)
-{
-	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-
-	if (ld)
-		pgio->pg_test = ld->pg_test;
-}
-
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -322,28 +327,6 @@
 {
 }
 
-static inline struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   loff_t pos, u64 count, enum pnfs_iomode access_type,
-		   gfp_t gfp_flags)
-{
-	return NULL;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *data,
-		      const struct rpc_call_ops *call_ops)
-{
-	return PNFS_NOT_ATTEMPTED;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *data,
-		       const struct rpc_call_ops *call_ops, int how)
-{
-	return PNFS_NOT_ATTEMPTED;
-}
-
 static inline int pnfs_return_layout(struct inode *ino)
 {
 	return 0;
@@ -385,9 +368,14 @@
 {
 }
 
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
-				    struct inode *inode)
+static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
+	return false;
+}
+
+static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+	return false;
 }
 
 static inline void
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index f0f8e1e..6fda522 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -100,8 +100,8 @@
 
 	rcu_read_lock();
 	d = _lookup_deviceid(ld, clp, id, hash);
-	if (d && !atomic_inc_not_zero(&d->ref))
-		d = NULL;
+	if (d != NULL)
+		atomic_inc(&d->ref);
 	rcu_read_unlock();
 	return d;
 }
@@ -115,15 +115,15 @@
 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
 
 /*
- * Unhash and put deviceid
+ * Remove a deviceid from cache
  *
  * @clp nfs_client associated with deviceid
  * @id the deviceid to unhash
  *
  * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
  */
-struct nfs4_deviceid_node *
-nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
+void
+nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
 			 const struct nfs_client *clp, const struct nfs4_deviceid *id)
 {
 	struct nfs4_deviceid_node *d;
@@ -134,7 +134,7 @@
 	rcu_read_unlock();
 	if (!d) {
 		spin_unlock(&nfs4_deviceid_lock);
-		return NULL;
+		return;
 	}
 	hlist_del_init_rcu(&d->node);
 	spin_unlock(&nfs4_deviceid_lock);
@@ -142,28 +142,7 @@
 
 	/* balance the initial ref set in pnfs_insert_deviceid */
 	if (atomic_dec_and_test(&d->ref))
-		return d;
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
-
-/*
- * Delete a deviceid from cache
- *
- * @clp struct nfs_client qualifying the deviceid
- * @id deviceid to delete
- */
-void
-nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
-		     const struct nfs_client *clp, const struct nfs4_deviceid *id)
-{
-	struct nfs4_deviceid_node *d;
-
-	d = nfs4_unhash_put_deviceid(ld, clp, id);
-	if (!d)
-		return;
-	d->ld->free_deviceid_node(d);
+		d->ld->free_deviceid_node(d);
 }
 EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
 
@@ -177,6 +156,7 @@
 	INIT_HLIST_NODE(&d->tmpnode);
 	d->ld = ld;
 	d->nfs_client = nfs_client;
+	d->flags = 0;
 	d->deviceid = *id;
 	atomic_set(&d->ref, 1);
 }
@@ -221,16 +201,15 @@
  *
  * @d deviceid node to put
  *
- * @ret true iff the node was deleted
+ * return true iff the node was deleted
+ * Note that since the test for d->ref == 0 is sufficient to establish
+ * that the node is no longer hashed in the global device id cache.
  */
 bool
 nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 {
-	if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
+	if (!atomic_dec_and_test(&d->ref))
 		return false;
-	hlist_del_init_rcu(&d->node);
-	spin_unlock(&nfs4_deviceid_lock);
-	synchronize_rcu();
 	d->ld->free_deviceid_node(d);
 	return true;
 }
@@ -275,3 +254,22 @@
 	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
 		_deviceid_purge_client(clp, h);
 }
+
+/*
+ * Stop use of all deviceids associated with an nfs_client
+ */
+void
+nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
+{
+	struct nfs4_deviceid_node *d;
+	struct hlist_node *n;
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
+		hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+			if (d->nfs_client == clp)
+				set_bit(NFS_DEVICEID_INVALID, &d->flags);
+	}
+	rcu_read_unlock();
+}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a68679f..2171c04 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,8 +30,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
+static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
@@ -68,7 +67,7 @@
 	mempool_free(p, nfs_rdata_mempool);
 }
 
-static void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_read_data *rdata)
 {
 	put_lseg(rdata->lseg);
 	put_nfs_open_context(rdata->args.context);
@@ -113,6 +112,27 @@
 	}
 }
 
+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+		struct inode *inode)
+{
+	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+			NFS_SERVER(inode)->rsize, 0);
+}
+
+void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
+{
+	pgio->pg_ops = &nfs_pageio_read_ops;
+	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+
+static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+		struct inode *inode)
+{
+	if (!pnfs_pageio_init_read(pgio, inode))
+		nfs_pageio_init_read_mds(pgio, inode);
+}
+
 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 		       struct page *page)
 {
@@ -131,14 +151,9 @@
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-	nfs_pageio_init(&pgio, inode, NULL, 0, 0);
-	nfs_list_add_request(new, &pgio.pg_list);
-	pgio.pg_count = len;
-
-	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
-		nfs_pagein_multi(&pgio);
-	else
-		nfs_pagein_one(&pgio);
+	nfs_pageio_init_read(&pgio, inode);
+	nfs_pageio_add_request(&pgio, new);
+	nfs_pageio_complete(&pgio);
 	return 0;
 }
 
@@ -202,17 +217,14 @@
 /*
  * Set up the NFS read request struct
  */
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
-		const struct rpc_call_ops *call_ops,
-		unsigned int count, unsigned int offset,
-		struct pnfs_layout_segment *lseg)
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		unsigned int count, unsigned int offset)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 
 	data->req	  = req;
 	data->inode	  = inode;
 	data->cred	  = req->wb_context->cred;
-	data->lseg	  = get_lseg(lseg);
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -226,14 +238,36 @@
 	data->res.count   = count;
 	data->res.eof     = 0;
 	nfs_fattr_init(&data->fattr);
+}
 
-	if (data->lseg &&
-	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
-		return 0;
+static int nfs_do_read(struct nfs_read_data *data,
+		const struct rpc_call_ops *call_ops)
+{
+	struct inode *inode = data->args.context->dentry->d_inode;
 
 	return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
 }
 
+static int
+nfs_do_multiple_reads(struct list_head *head,
+		const struct rpc_call_ops *call_ops)
+{
+	struct nfs_read_data *data;
+	int ret = 0;
+
+	while (!list_empty(head)) {
+		int ret2;
+
+		data = list_entry(head->next, struct nfs_read_data, list);
+		list_del_init(&data->list);
+
+		ret2 = nfs_do_read(data, call_ops);
+		if (ret == 0)
+			ret = ret2;
+	}
+	return ret;
+}
+
 static void
 nfs_async_read_error(struct list_head *head)
 {
@@ -260,20 +294,19 @@
  * won't see the new data until our attribute cache is updated.  This is more
  * or less conventional NFS client behavior.
  */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
 	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 	struct page *page = req->wb_page;
 	struct nfs_read_data *data;
-	size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
+	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
 	int requests = 0;
 	int ret = 0;
-	struct pnfs_layout_segment *lseg;
-	LIST_HEAD(list);
 
 	nfs_list_remove_request(req);
 
+	offset = 0;
 	nbytes = desc->pg_count;
 	do {
 		size_t len = min(nbytes,rsize);
@@ -281,45 +314,21 @@
 		data = nfs_readdata_alloc(1);
 		if (!data)
 			goto out_bad;
-		list_add(&data->pages, &list);
+		data->pagevec[0] = page;
+		nfs_read_rpcsetup(req, data, len, offset);
+		list_add(&data->list, res);
 		requests++;
 		nbytes -= len;
+		offset += len;
 	} while(nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
-
-	BUG_ON(desc->pg_lseg != NULL);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-				  req_offset(req), desc->pg_count,
-				  IOMODE_READ, GFP_KERNEL);
 	ClearPageError(page);
-	offset = 0;
-	nbytes = desc->pg_count;
-	do {
-		int ret2;
-
-		data = list_entry(list.next, struct nfs_read_data, pages);
-		list_del_init(&data->pages);
-
-		data->pagevec[0] = page;
-
-		if (nbytes < rsize)
-			rsize = nbytes;
-		ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
-					 rsize, offset, lseg);
-		if (ret == 0)
-			ret = ret2;
-		offset += rsize;
-		nbytes -= rsize;
-	} while (nbytes != 0);
-	put_lseg(lseg);
-	desc->pg_lseg = NULL;
-
+	desc->pg_rpc_callops = &nfs_read_partial_ops;
 	return ret;
-
 out_bad:
-	while (!list_empty(&list)) {
-		data = list_entry(list.next, struct nfs_read_data, pages);
-		list_del(&data->pages);
+	while (!list_empty(res)) {
+		data = list_entry(res->next, struct nfs_read_data, list);
+		list_del(&data->list);
 		nfs_readdata_free(data);
 	}
 	SetPageError(page);
@@ -327,19 +336,19 @@
 	return -ENOMEM;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_read_data	*data;
 	struct list_head *head = &desc->pg_list;
-	struct pnfs_layout_segment *lseg = desc->pg_lseg;
-	int ret = -ENOMEM;
+	int ret = 0;
 
 	data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
 						     desc->pg_count));
 	if (!data) {
 		nfs_async_read_error(head);
+		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -352,19 +361,37 @@
 		*pages++ = req->wb_page;
 	}
 	req = nfs_list_entry(data->pages.next);
-	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-					  req_offset(req), desc->pg_count,
-					  IOMODE_READ, GFP_KERNEL);
 
-	ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
-				0, lseg);
+	nfs_read_rpcsetup(req, data, desc->pg_count, 0);
+	list_add(&data->list, res);
+	desc->pg_rpc_callops = &nfs_read_full_ops;
 out:
-	put_lseg(lseg);
-	desc->pg_lseg = NULL;
 	return ret;
 }
 
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+	if (desc->pg_bsize < PAGE_CACHE_SIZE)
+		return nfs_pagein_multi(desc, head);
+	return nfs_pagein_one(desc, head);
+}
+
+static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+	LIST_HEAD(head);
+	int ret;
+
+	ret = nfs_generic_pagein(desc, &head);
+	if (ret == 0)
+		ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+	return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_read_ops = {
+	.pg_test = nfs_generic_pg_test,
+	.pg_doio = nfs_generic_pg_readpages,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@
 		.pgio = &pgio,
 	};
 	struct inode *inode = mapping->host;
-	struct nfs_server *server = NFS_SERVER(inode);
-	size_t rsize = server->rsize;
 	unsigned long npages;
 	int ret = -ESTALE;
 
@@ -664,10 +689,7 @@
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	if (rsize < PAGE_CACHE_SIZE)
-		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
-	else
-		nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
+	nfs_pageio_init_read(&pgio, inode);
 
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8d6864c..b2fbbde 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -147,7 +147,7 @@
 
 	alias = d_lookup(parent, &data->args.name);
 	if (alias != NULL) {
-		int ret = 0;
+		int ret;
 		void *devname_garbage = NULL;
 
 		/*
@@ -155,14 +155,16 @@
 		 * the sillyrename information to the aliased dentry.
 		 */
 		nfs_free_dname(data);
+		ret = nfs_copy_dname(alias, data);
 		spin_lock(&alias->d_lock);
-		if (alias->d_inode != NULL &&
+		if (ret == 0 && alias->d_inode != NULL &&
 		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
 			devname_garbage = alias->d_fsdata;
 			alias->d_fsdata = data;
 			alias->d_flags |= DCACHE_NFSFS_RENAMED;
 			ret = 1;
-		}
+		} else
+			ret = 0;
 		spin_unlock(&alias->d_lock);
 		nfs_dec_sillycount(dir);
 		dput(alias);
@@ -171,8 +173,7 @@
 		 * point dentry is definitely not a root, so we won't need
 		 * that anymore.
 		 */
-		if (devname_garbage)
-			kfree(devname_garbage);
+		kfree(devname_garbage);
 		return ret;
 	}
 	data->dir = igrab(dir);
@@ -204,8 +205,6 @@
 	if (parent == NULL)
 		goto out_free;
 	dir = parent->d_inode;
-	if (nfs_copy_dname(dentry, data) != 0)
-		goto out_dput;
 	/* Non-exclusive lock protects against concurrent lookup() calls */
 	spin_lock(&dir->i_lock);
 	if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -366,6 +365,8 @@
 	struct nfs_renamedata *data = calldata;
 	struct inode *old_dir = data->old_dir;
 	struct inode *new_dir = data->new_dir;
+	struct dentry *old_dentry = data->old_dentry;
+	struct dentry *new_dentry = data->new_dentry;
 
 	if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
 		nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
@@ -373,12 +374,12 @@
 	}
 
 	if (task->tk_status != 0) {
-		nfs_cancel_async_unlink(data->old_dentry);
+		nfs_cancel_async_unlink(old_dentry);
 		return;
 	}
 
-	nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
-	d_move(data->old_dentry, data->new_dentry);
+	d_drop(old_dentry);
+	d_drop(new_dentry);
 }
 
 /**
@@ -501,6 +502,14 @@
  * and only performs the unlink once the last reference to it is put.
  *
  * The final cleanup is done during dentry_iput.
+ *
+ * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
+ * could take responsibility for keeping open files referenced.  The server
+ * would also need to ensure that opened-but-deleted files were kept over
+ * reboots.  However, we may not assume a server does so.  (RFC 5661
+ * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
+ * use to advertise that it does this; some day we may take advantage of
+ * it.))
  */
 int
 nfs_sillyrename(struct inode *dir, struct dentry *dentry)
@@ -560,6 +569,14 @@
 	if (error)
 		goto out_dput;
 
+	/* populate unlinkdata with the right dname */
+	error = nfs_copy_dname(sdentry,
+				(struct nfs_unlinkdata *)dentry->d_fsdata);
+	if (error) {
+		nfs_cancel_async_unlink(dentry);
+		goto out_dput;
+	}
+
 	/* run the rename task, undo unlink if it fails */
 	task = nfs_async_rename(dir, dir, dentry, sdentry);
 	if (IS_ERR(task)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 00e3750..b39b37f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,7 +97,7 @@
 	mempool_free(p, nfs_wdata_mempool);
 }
 
-static void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_write_data *wdata)
 {
 	put_lseg(wdata->lseg);
 	put_nfs_open_context(wdata->args.context);
@@ -845,11 +845,9 @@
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static int nfs_write_rpcsetup(struct nfs_page *req,
+static void nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
-		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
-		struct pnfs_layout_segment *lseg,
 		int how)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
@@ -860,7 +858,6 @@
 	data->req = req;
 	data->inode = inode = req->wb_context->dentry->d_inode;
 	data->cred = req->wb_context->cred;
-	data->lseg = get_lseg(lseg);
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -872,24 +869,51 @@
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
 	data->args.stable  = NFS_UNSTABLE;
-	if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
-		data->args.stable = NFS_DATA_SYNC;
-		if (!nfs_need_commit(NFS_I(inode)))
-			data->args.stable = NFS_FILE_SYNC;
+	switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+	case 0:
+		break;
+	case FLUSH_COND_STABLE:
+		if (nfs_need_commit(NFS_I(inode)))
+			break;
+	default:
+		data->args.stable = NFS_FILE_SYNC;
 	}
 
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
+}
 
-	if (data->lseg &&
-	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
-		return 0;
+static int nfs_do_write(struct nfs_write_data *data,
+		const struct rpc_call_ops *call_ops,
+		int how)
+{
+	struct inode *inode = data->args.context->dentry->d_inode;
 
 	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
 }
 
+static int nfs_do_multiple_writes(struct list_head *head,
+		const struct rpc_call_ops *call_ops,
+		int how)
+{
+	struct nfs_write_data *data;
+	int ret = 0;
+
+	while (!list_empty(head)) {
+		int ret2;
+
+		data = list_entry(head->next, struct nfs_write_data, list);
+		list_del_init(&data->list);
+		
+		ret2 = nfs_do_write(data, call_ops, how);
+		 if (ret == 0)
+			 ret = ret2;
+	}
+	return ret;
+}
+
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
  * call this on each, which will prepare them to be retried on next
  * writeback using standard nfs.
@@ -907,17 +931,15 @@
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
 	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 	struct page *page = req->wb_page;
 	struct nfs_write_data *data;
-	size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
+	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
 	int requests = 0;
 	int ret = 0;
-	struct pnfs_layout_segment *lseg;
-	LIST_HEAD(list);
 
 	nfs_list_remove_request(req);
 
@@ -927,6 +949,7 @@
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 
+	offset = 0;
 	nbytes = desc->pg_count;
 	do {
 		size_t len = min(nbytes, wsize);
@@ -934,45 +957,21 @@
 		data = nfs_writedata_alloc(1);
 		if (!data)
 			goto out_bad;
-		list_add(&data->pages, &list);
+		data->pagevec[0] = page;
+		nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+		list_add(&data->list, res);
 		requests++;
 		nbytes -= len;
+		offset += len;
 	} while (nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
-
-	BUG_ON(desc->pg_lseg);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-				  req_offset(req), desc->pg_count,
-				  IOMODE_RW, GFP_NOFS);
-	ClearPageError(page);
-	offset = 0;
-	nbytes = desc->pg_count;
-	do {
-		int ret2;
-
-		data = list_entry(list.next, struct nfs_write_data, pages);
-		list_del_init(&data->pages);
-
-		data->pagevec[0] = page;
-
-		if (nbytes < wsize)
-			wsize = nbytes;
-		ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
-					  wsize, offset, lseg, desc->pg_ioflags);
-		if (ret == 0)
-			ret = ret2;
-		offset += wsize;
-		nbytes -= wsize;
-	} while (nbytes != 0);
-
-	put_lseg(lseg);
-	desc->pg_lseg = NULL;
+	desc->pg_rpc_callops = &nfs_write_partial_ops;
 	return ret;
 
 out_bad:
-	while (!list_empty(&list)) {
-		data = list_entry(list.next, struct nfs_write_data, pages);
-		list_del(&data->pages);
+	while (!list_empty(res)) {
+		data = list_entry(res->next, struct nfs_write_data, list);
+		list_del(&data->list);
 		nfs_writedata_free(data);
 	}
 	nfs_redirty_request(req);
@@ -987,14 +986,13 @@
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
-	struct pnfs_layout_segment *lseg = desc->pg_lseg;
-	int ret;
+	int ret = 0;
 
 	data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
 						      desc->pg_count));
@@ -1016,32 +1014,62 @@
 		*pages++ = req->wb_page;
 	}
 	req = nfs_list_entry(data->pages.next);
-	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-					  req_offset(req), desc->pg_count,
-					  IOMODE_RW, GFP_NOFS);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+	nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
+	list_add(&data->list, res);
+	desc->pg_rpc_callops = &nfs_write_full_ops;
 out:
-	put_lseg(lseg); /* Cleans any gotten in ->pg_test */
-	desc->pg_lseg = NULL;
 	return ret;
 }
 
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+	if (desc->pg_bsize < PAGE_CACHE_SIZE)
+		return nfs_flush_multi(desc, head);
+	return nfs_flush_one(desc, head);
+}
+
+static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+	LIST_HEAD(head);
+	int ret;
+
+	ret = nfs_generic_flush(desc, &head);
+	if (ret == 0)
+		ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
+				desc->pg_ioflags);
+	return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_write_ops = {
+	.pg_test = nfs_generic_pg_test,
+	.pg_doio = nfs_generic_pg_writepages,
+};
+
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+				  struct inode *inode, int ioflags)
+{
+	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+				NFS_SERVER(inode)->wsize, ioflags);
+}
+
+void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
+{
+	pgio->pg_ops = &nfs_pageio_write_ops;
+	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
+
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags)
 {
-	size_t wsize = NFS_SERVER(inode)->wsize;
-
-	if (wsize < PAGE_CACHE_SIZE)
-		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
-	else
-		nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
+	if (!pnfs_pageio_init_write(pgio, inode, ioflags))
+		nfs_pageio_init_write_mds(pgio, inode, ioflags);
 }
 
 /*
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3b8d397..98e5442 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -93,7 +93,7 @@
 
 	memset(bh->b_data, 0, sizeof(struct omfs_inode));
 
-	if (inode->i_mode & S_IFDIR) {
+	if (S_ISDIR(inode->i_mode)) {
 		memset(&bh->b_data[OMFS_DIR_START], 0xff,
 			sbi->s_sys_blocksize - OMFS_DIR_START);
 	} else
diff --git a/fs/open.c b/fs/open.c
index 739b751..f711921 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -446,74 +446,52 @@
 	return error;
 }
 
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+static int chmod_common(struct path *path, umode_t mode)
 {
-	struct inode * inode;
-	struct dentry * dentry;
-	struct file * file;
-	int err = -EBADF;
+	struct inode *inode = path->dentry->d_inode;
 	struct iattr newattrs;
+	int error;
 
-	file = fget(fd);
-	if (!file)
-		goto out;
-
-	dentry = file->f_path.dentry;
-	inode = dentry->d_inode;
-
-	audit_inode(NULL, dentry);
-
-	err = mnt_want_write_file(file);
-	if (err)
-		goto out_putf;
+	error = mnt_want_write(path->mnt);
+	if (error)
+		return error;
 	mutex_lock(&inode->i_mutex);
-	err = security_path_chmod(dentry, file->f_vfsmnt, mode);
-	if (err)
+	error = security_path_chmod(path->dentry, path->mnt, mode);
+	if (error)
 		goto out_unlock;
-	if (mode == (mode_t) -1)
-		mode = inode->i_mode;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	err = notify_change(dentry, &newattrs);
+	error = notify_change(path->dentry, &newattrs);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
-	mnt_drop_write(file->f_path.mnt);
-out_putf:
-	fput(file);
-out:
+	mnt_drop_write(path->mnt);
+	return error;
+}
+
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+{
+	struct file * file;
+	int err = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		audit_inode(NULL, file->f_path.dentry);
+		err = chmod_common(&file->f_path, mode);
+		fput(file);
+	}
 	return err;
 }
 
 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 {
 	struct path path;
-	struct inode *inode;
 	int error;
-	struct iattr newattrs;
 
 	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
-	if (error)
-		goto out;
-	inode = path.dentry->d_inode;
-
-	error = mnt_want_write(path.mnt);
-	if (error)
-		goto dput_and_out;
-	mutex_lock(&inode->i_mutex);
-	error = security_path_chmod(path.dentry, path.mnt, mode);
-	if (error)
-		goto out_unlock;
-	if (mode == (mode_t) -1)
-		mode = inode->i_mode;
-	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
-	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(path.dentry, &newattrs);
-out_unlock:
-	mutex_unlock(&inode->i_mutex);
-	mnt_drop_write(path.mnt);
-dput_and_out:
-	path_put(&path);
-out:
+	if (!error) {
+		error = chmod_common(&path, mode);
+		path_put(&path);
+	}
 	return error;
 }
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 1b7f9af..0e0be1d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -948,7 +948,7 @@
 
 static struct inode * get_pipe_inode(void)
 {
-	struct inode *inode = new_inode(pipe_mnt->mnt_sb);
+	struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
 	struct pipe_inode_info *pipe;
 
 	if (!inode)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1637f1..9d99131 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -620,8 +620,7 @@
 	if (!ent) goto out;
 
 	memset(ent, 0, sizeof(struct proc_dir_entry));
-	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
-	ent->name = ((char *) ent) + sizeof(*ent);
+	memcpy(ent->name, fn, len + 1);
 	ent->namelen = len;
 	ent->mode = mode;
 	ent->nlink = nlink;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 9020ac1..f738024 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -197,15 +197,15 @@
 	int err;
 
 	err = -ENOMEM;
-	netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+	netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
 	if (!netd)
 		goto out;
 
 	netd->data = net;
 	netd->nlink = 2;
-	netd->name = "net";
 	netd->namelen = 3;
 	netd->parent = &proc_root;
+	memcpy(netd->name, "net", 4);
 
 	err = -EEXIST;
 	net_statd = proc_net_mkdir(net, "stat", netd);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index d6c3b41..9a8a2b7 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -186,13 +186,13 @@
 struct proc_dir_entry proc_root = {
 	.low_ino	= PROC_ROOT_INO, 
 	.namelen	= 5, 
-	.name		= "/proc",
 	.mode		= S_IFDIR | S_IRUGO | S_IXUGO, 
 	.nlink		= 2, 
 	.count		= ATOMIC_INIT(1),
 	.proc_iops	= &proc_root_inode_operations, 
 	.proc_fops	= &proc_root_operations,
 	.parent		= &proc_root,
+	.name		= "/proc",
 };
 
 int pid_ns_prepare_proc(struct pid_namespace *ns)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5907b49..179f1c3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -166,8 +166,10 @@
 			 * long as offset isn't at the end of the file then the
 			 * offset is data.
 			 */
-			if (offset >= inode->i_size)
-				return -ENXIO;
+			if (offset >= inode->i_size) {
+				retval = -ENXIO;
+				goto out;
+			}
 			break;
 		case SEEK_HOLE:
 			/*
@@ -175,8 +177,10 @@
 			 * as long as offset isn't i_size or larger, return
 			 * i_size.
 			 */
-			if (offset >= inode->i_size)
-				return -ENXIO;
+			if (offset >= inode->i_size) {
+				retval = -ENXIO;
+				goto out;
+			}
 			offset = inode->i_size;
 			break;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b2b4119..d1fe745 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1224,6 +1224,9 @@
 		rw = READ;
 	}
 
+	/* we only use the buffer cache for meta-data */
+	rw |= REQ_META;
+
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
 	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 825390e..7f7b424 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -149,7 +149,9 @@
 
 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
 
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	xfs_ioend_wait(ip);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	if (mp->m_flags & XFS_MOUNT_BARRIER) {
 		/*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index acca2c5..f7ce7de 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -265,7 +265,7 @@
 		return PTR_ERR(filp);
 	}
 
-	if (inode->i_mode & S_IFREG) {
+	if (S_ISREG(inode->i_mode)) {
 		filp->f_flags |= O_NOATIME;
 		filp->f_mode |= FMODE_NOCMTIME;
 	}
@@ -850,14 +850,14 @@
 		di_flags |= XFS_DIFLAG_NODEFRAG;
 	if (xflags & XFS_XFLAG_FILESTREAM)
 		di_flags |= XFS_DIFLAG_FILESTREAM;
-	if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+	if (S_ISDIR(ip->i_d.di_mode)) {
 		if (xflags & XFS_XFLAG_RTINHERIT)
 			di_flags |= XFS_DIFLAG_RTINHERIT;
 		if (xflags & XFS_XFLAG_NOSYMLINKS)
 			di_flags |= XFS_DIFLAG_NOSYMLINKS;
 		if (xflags & XFS_XFLAG_EXTSZINHERIT)
 			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+	} else if (S_ISREG(ip->i_d.di_mode)) {
 		if (xflags & XFS_XFLAG_REALTIME)
 			di_flags |= XFS_DIFLAG_REALTIME;
 		if (xflags & XFS_XFLAG_EXTSIZE)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6544c32..b9c172b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -1194,9 +1194,14 @@
 		break;
 	}
 
-	/* if there is no attribute fork no ACL can exist on this inode */
-	if (!XFS_IFORK_Q(ip))
+	/*
+	 * If there is no attribute fork no ACL can exist on this inode,
+	 * and it can't have any file capabilities attached to it either.
+	 */
+	if (!XFS_IFORK_Q(ip)) {
+		inode_has_no_xattr(inode);
 		cache_no_acl(inode);
+	}
 
 	xfs_iflags_clear(ip, XFS_INEW);
 	barrier();
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c51a3f9..ab3e5c6 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -414,7 +414,7 @@
 
 	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
 		return 0;
-	if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+	if (S_ISDIR(ip->i_d.di_mode)) {
 		mp = ip->i_mount;
 		memset(&dargs, 0, sizeof(dargs));
 		dargs.dp = ip;
@@ -3344,8 +3344,7 @@
 	 * We don't want to deal with the case of keeping inode data inline yet.
 	 * So sending the data fork of a regular inode is invalid.
 	 */
-	ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
-		 whichfork == XFS_DATA_FORK));
+	ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 	flags = 0;
@@ -4052,7 +4051,7 @@
 
 #ifndef DEBUG
 	if (whichfork == XFS_DATA_FORK) {
-		return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
+		return S_ISREG(ip->i_d.di_mode) ?
 			(ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
 			(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
 	}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2925726..5bfcb87 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -692,6 +692,24 @@
 	return(error);
 }
 
+#ifdef	DEBUG
+static void
+xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
+{
+	__be16	magic = blkinfo->magic;
+
+	if (level == 1) {
+		ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+		       magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+	} else
+		ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+	ASSERT(!blkinfo->forw);
+	ASSERT(!blkinfo->back);
+}
+#else	/* !DEBUG */
+#define	xfs_da_blkinfo_onlychild_validate(blkinfo, level)
+#endif	/* !DEBUG */
+
 /*
  * We have only one entry in the root.  Copy the only remaining child of
  * the old root to block 0 as the new root node.
@@ -700,8 +718,6 @@
 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 {
 	xfs_da_intnode_t *oldroot;
-	/* REFERENCED */
-	xfs_da_blkinfo_t *blkinfo;
 	xfs_da_args_t *args;
 	xfs_dablk_t child;
 	xfs_dabuf_t *bp;
@@ -732,15 +748,9 @@
 	if (error)
 		return(error);
 	ASSERT(bp != NULL);
-	blkinfo = bp->data;
-	if (be16_to_cpu(oldroot->hdr.level) == 1) {
-		ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-		       blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-	} else {
-		ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-	}
-	ASSERT(!blkinfo->forw);
-	ASSERT(!blkinfo->back);
+	xfs_da_blkinfo_onlychild_validate(bp->data,
+					be16_to_cpu(oldroot->hdr.level));
+
 	memcpy(root_blk->bp->data, bp->data, state->blocksize);
 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
 	error = xfs_da_shrink_inode(args, child, bp);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 4580ce0..a2e2701 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -121,7 +121,7 @@
 {
 	xfs_dir2_sf_hdr_t	*sfp;
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	if (dp->i_d.di_size == 0)	/* might happen during shutdown. */
 		return 1;
 	if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -179,7 +179,7 @@
 	memset((char *)&args, 0, sizeof(args));
 	args.dp = dp;
 	args.trans = tp;
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
 		return error;
 	return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -202,7 +202,7 @@
 	int			rval;
 	int			v;		/* type-checking value */
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
 		return rval;
 	XFS_STATS_INC(xs_dir_create);
@@ -278,7 +278,7 @@
 	int		rval;
 	int		v;		/* type-checking value */
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	XFS_STATS_INC(xs_dir_lookup);
 
 	memset(&args, 0, sizeof(xfs_da_args_t));
@@ -333,7 +333,7 @@
 	int		rval;
 	int		v;		/* type-checking value */
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	XFS_STATS_INC(xs_dir_remove);
 
 	memset(&args, 0, sizeof(xfs_da_args_t));
@@ -382,7 +382,7 @@
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return XFS_ERROR(EIO);
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 	XFS_STATS_INC(xs_dir_getdents);
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -414,7 +414,7 @@
 	int		rval;
 	int		v;		/* type-checking value */
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 
 	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
 		return rval;
@@ -464,7 +464,7 @@
 	if (resblks)
 		return 0;
 
-	ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
 
 	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9124425..3ff3d9e 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -344,9 +344,9 @@
 	 * Either ip is a regular file and pip is a directory, or ip is a
 	 * directory and pip is NULL.
 	 */
-	ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
-	               (pip->i_d.di_mode & S_IFDIR)) ||
-	              ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+	ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
+	               S_ISDIR(pip->i_d.di_mode)) ||
+	              (S_ISDIR(ip->i_d.di_mode) && !pip)));
 
 	mp = ip->i_mount;
 	cache = mp->m_filestream;
@@ -537,7 +537,7 @@
 	xfs_agnumber_t	ag;
 	int		ref;
 
-	if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+	if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
 		ASSERT(0);
 		return NULLAGNUMBER;
 	}
@@ -579,9 +579,9 @@
 	xfs_agnumber_t	ag, rotorstep, startag;
 	int		err = 0;
 
-	ASSERT(pip->i_d.di_mode & S_IFDIR);
-	ASSERT(ip->i_d.di_mode & S_IFREG);
-	if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+	ASSERT(S_ISDIR(pip->i_d.di_mode));
+	ASSERT(S_ISREG(ip->i_d.di_mode));
+	if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
 		return -EINVAL;
 
 	mp = pip->i_mount;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3cc21dd..2fcca4b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -368,7 +368,7 @@
 			/*
 			 * no local regular files yet
 			 */
-			if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
+			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
 				xfs_warn(ip->i_mount,
 			"corrupt inode %Lu (local format for regular file).",
 					(unsigned long long) ip->i_ino);
@@ -1040,7 +1040,7 @@
 
 	if (pip && XFS_INHERIT_GID(pip)) {
 		ip->i_d.di_gid = pip->i_d.di_gid;
-		if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
+		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
 			ip->i_d.di_mode |= S_ISGID;
 		}
 	}
@@ -1097,14 +1097,14 @@
 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 			uint	di_flags = 0;
 
-			if ((mode & S_IFMT) == S_IFDIR) {
+			if (S_ISDIR(mode)) {
 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 					ip->i_d.di_extsize = pip->i_d.di_extsize;
 				}
-			} else if ((mode & S_IFMT) == S_IFREG) {
+			} else if (S_ISREG(mode)) {
 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_REALTIME;
 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1188,7 +1188,7 @@
 	int			nimaps;
 	xfs_bmbt_irec_t		imaps[2];
 
-	if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+	if (!S_ISREG(ip->i_d.di_mode))
 		return;
 
 	if (XFS_IS_REALTIME_INODE(ip))
@@ -1828,7 +1828,7 @@
 	ASSERT(ip->i_d.di_nextents == 0);
 	ASSERT(ip->i_d.di_anextents == 0);
 	ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
-	       ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
+	       (!S_ISREG(ip->i_d.di_mode)));
 	ASSERT(ip->i_d.di_nblocks == 0);
 
 	/*
@@ -2671,7 +2671,7 @@
 			__func__, ip->i_ino, ip, ip->i_d.di_magic);
 		goto corrupt_out;
 	}
-	if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+	if (S_ISREG(ip->i_d.di_mode)) {
 		if (XFS_TEST_ERROR(
 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2681,7 +2681,7 @@
 				__func__, ip->i_ino, ip);
 			goto corrupt_out;
 		}
-	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+	} else if (S_ISDIR(ip->i_d.di_mode)) {
 		if (XFS_TEST_ERROR(
 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a97644a..2380a4b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -263,7 +263,7 @@
 	struct inode		i_vnode;	/* embedded VFS inode */
 } xfs_inode_t;
 
-#define XFS_ISIZE(ip)	(((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
+#define XFS_ISIZE(ip)	S_ISREG((ip)->i_d.di_mode) ? \
 				(ip)->i_size : (ip)->i_d.di_size;
 
 /* Convert from vfs inode to xfs inode */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8fe4206..052a2c0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2283,7 +2283,7 @@
 	/* Take the opportunity to reset the flush iteration count */
 	dicp->di_flushiter = 0;
 
-	if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
+	if (unlikely(S_ISREG(dicp->di_mode))) {
 		if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
 			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
@@ -2296,7 +2296,7 @@
 			error = EFSCORRUPTED;
 			goto error;
 		}
-	} else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
+	} else if (unlikely(S_ISDIR(dicp->di_mode))) {
 		if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
 		    (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7f25245..092e16a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1331,7 +1331,7 @@
 
 	ASSERT(rip != NULL);
 
-	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
+	if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
 		xfs_warn(mp, "corrupted root inode %llu: not a directory",
 			(unsigned long long)rip->i_ino);
 		xfs_iunlock(rip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 77a5989..df78c29 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -116,7 +116,7 @@
 	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
 
 	new_parent = (src_dp != target_dp);
-	src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
 
 	if (src_is_directory) {
 		/*
@@ -226,7 +226,7 @@
 		 * target and source are directories and that target can be
 		 * destroyed, or that neither is a directory.
 		 */
-		if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+		if (S_ISDIR(target_ip->i_d.di_mode)) {
 			/*
 			 * Make sure target dir is empty.
 			 */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 88d1214..9322e13 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -121,7 +121,7 @@
 
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 
-	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
+	ASSERT(S_ISLNK(ip->i_d.di_mode));
 	ASSERT(ip->i_d.di_size <= MAXPATHLEN);
 
 	pathlen = ip->i_d.di_size;
@@ -529,7 +529,7 @@
 	if (ip->i_d.di_nlink == 0)
 		return 0;
 
-	if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+	if ((S_ISREG(ip->i_d.di_mode) &&
 	     ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
 	       ip->i_delayed_blks > 0)) &&
 	     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
@@ -610,7 +610,7 @@
 	truncate = ((ip->i_d.di_nlink == 0) &&
 	    ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
 	     (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
-	    ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
+	    S_ISREG(ip->i_d.di_mode));
 
 	mp = ip->i_mount;
 
@@ -621,7 +621,7 @@
 		goto out;
 
 	if (ip->i_d.di_nlink != 0) {
-		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+		if ((S_ISREG(ip->i_d.di_mode) &&
                      ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
                        ip->i_delayed_blks > 0)) &&
 		      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
@@ -669,7 +669,7 @@
 			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 			return VN_INACTIVE_CACHE;
 		}
-	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
+	} else if (S_ISLNK(ip->i_d.di_mode)) {
 
 		/*
 		 * If we get an error while cleaning up a
diff --git a/include/keys/encrypted-type.h b/include/keys/encrypted-type.h
index 9585501..1d45413 100644
--- a/include/keys/encrypted-type.h
+++ b/include/keys/encrypted-type.h
@@ -1,6 +1,11 @@
 /*
  * Copyright (C) 2010 IBM Corporation
- * Author: Mimi Zohar <zohar@us.ibm.com>
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Mimi Zohar <zohar@us.ibm.com>
+ * Roberto Sassu <roberto.sassu@polito.it>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -15,13 +20,17 @@
 
 struct encrypted_key_payload {
 	struct rcu_head rcu;
+	char *format;		/* datablob: format */
 	char *master_desc;	/* datablob: master key name */
 	char *datalen;		/* datablob: decrypted key length */
 	u8 *iv;			/* datablob: iv */
 	u8 *encrypted_data;	/* datablob: encrypted data */
 	unsigned short datablob_len;	/* length of datablob */
 	unsigned short decrypted_datalen;	/* decrypted data length */
-	u8 decrypted_data[0];	/* decrypted data +  datablob + hmac */
+	unsigned short payload_datalen;		/* payload data length */
+	unsigned short encrypted_key_format;	/* encrypted key format */
+	u8 *decrypted_data;	/* decrypted data */
+	u8 payload_data[0];	/* payload data + datablob + hmac */
 };
 
 extern struct key_type key_type_encrypted;
diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h
new file mode 100644
index 0000000..2224a8c
--- /dev/null
+++ b/include/linux/ecryptfs.h
@@ -0,0 +1,113 @@
+#ifndef _LINUX_ECRYPTFS_H
+#define _LINUX_ECRYPTFS_H
+
+/* Version verification for shared data structures w/ userspace */
+#define ECRYPTFS_VERSION_MAJOR 0x00
+#define ECRYPTFS_VERSION_MINOR 0x04
+#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
+/* These flags indicate which features are supported by the kernel
+ * module; userspace tools such as the mount helper read
+ * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
+ * how to behave. */
+#define ECRYPTFS_VERSIONING_PASSPHRASE            0x00000001
+#define ECRYPTFS_VERSIONING_PUBKEY                0x00000002
+#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
+#define ECRYPTFS_VERSIONING_POLICY                0x00000008
+#define ECRYPTFS_VERSIONING_XATTR                 0x00000010
+#define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
+#define ECRYPTFS_VERSIONING_DEVMISC               0x00000040
+#define ECRYPTFS_VERSIONING_HMAC                  0x00000080
+#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION   0x00000100
+#define ECRYPTFS_VERSIONING_GCM                   0x00000200
+#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
+				  | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
+				  | ECRYPTFS_VERSIONING_PUBKEY \
+				  | ECRYPTFS_VERSIONING_XATTR \
+				  | ECRYPTFS_VERSIONING_MULTKEY \
+				  | ECRYPTFS_VERSIONING_DEVMISC \
+				  | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
+#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
+#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
+#define ECRYPTFS_SALT_SIZE 8
+#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
+/* The original signature size is only for what is stored on disk; all
+ * in-memory representations are expanded hex, so it better adapted to
+ * be passed around or referenced on the command line */
+#define ECRYPTFS_SIG_SIZE 8
+#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
+#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
+#define ECRYPTFS_MAX_KEY_BYTES 64
+#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
+#define ECRYPTFS_FILE_VERSION 0x03
+#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
+
+#define RFC2440_CIPHER_DES3_EDE 0x02
+#define RFC2440_CIPHER_CAST_5 0x03
+#define RFC2440_CIPHER_BLOWFISH 0x04
+#define RFC2440_CIPHER_AES_128 0x07
+#define RFC2440_CIPHER_AES_192 0x08
+#define RFC2440_CIPHER_AES_256 0x09
+#define RFC2440_CIPHER_TWOFISH 0x0a
+#define RFC2440_CIPHER_CAST_6 0x0b
+
+#define RFC2440_CIPHER_RSA 0x01
+
+/**
+ * For convenience, we may need to pass around the encrypted session
+ * key between kernel and userspace because the authentication token
+ * may not be extractable.  For example, the TPM may not release the
+ * private key, instead requiring the encrypted data and returning the
+ * decrypted data.
+ */
+struct ecryptfs_session_key {
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
+#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
+#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
+	u32 flags;
+	u32 encrypted_key_size;
+	u32 decrypted_key_size;
+	u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
+	u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
+};
+
+struct ecryptfs_password {
+	u32 password_bytes;
+	s32 hash_algo;
+	u32 hash_iterations;
+	u32 session_key_encryption_key_bytes;
+#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
+#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
+	u32 flags;
+	/* Iterated-hash concatenation of salt and passphrase */
+	u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
+	u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+	/* Always in expanded hex */
+	u8 salt[ECRYPTFS_SALT_SIZE];
+};
+
+enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
+
+struct ecryptfs_private_key {
+	u32 key_size;
+	u32 data_len;
+	u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+	char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
+	u8 data[];
+};
+
+/* May be a password or a private key */
+struct ecryptfs_auth_tok {
+	u16 version; /* 8-bit major and 8-bit minor */
+	u16 token_type;
+#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
+	u32 flags;
+	struct ecryptfs_session_key session_key;
+	u8 reserved[32];
+	union {
+		struct ecryptfs_password password;
+		struct ecryptfs_private_key private_key;
+	} token;
+} __attribute__ ((packed));
+
+#endif /* _LINUX_ECRYPTFS_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5f523eb..f23bcb7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2310,7 +2310,8 @@
 extern void iget_failed(struct inode *);
 extern void end_writeback(struct inode *);
 extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode(struct super_block *);
+extern struct inode *new_inode_pseudo(struct super_block *sb);
+extern struct inode *new_inode(struct super_block *sb);
 extern void free_inode_nonrcu(struct inode *inode);
 extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
diff --git a/include/linux/if.h b/include/linux/if.h
index 3bc63e6..03489ca 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -76,6 +76,8 @@
 #define IFF_BRIDGE_PORT	0x4000		/* device used as bridge port */
 #define IFF_OVS_DATAPATH	0x8000	/* device used as Open vSwitch
 					 * datapath port */
+#define IFF_TX_SKB_SHARING	0x10000	/* The interface supports sharing
+					 * skbs on transmit */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/input.h b/include/linux/input.h
index 771d6d8..068784e 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -119,9 +119,9 @@
 #define EVIOCGSND(len)		_IOC(_IOC_READ, 'E', 0x1a, len)		/* get all sounds status */
 #define EVIOCGSW(len)		_IOC(_IOC_READ, 'E', 0x1b, len)		/* get all switch states */
 
-#define EVIOCGBIT(ev,len)	_IOC(_IOC_READ, 'E', 0x20 + ev, len)	/* get event bits */
-#define EVIOCGABS(abs)		_IOR('E', 0x40 + abs, struct input_absinfo)	/* get abs value/limits */
-#define EVIOCSABS(abs)		_IOW('E', 0xc0 + abs, struct input_absinfo)	/* set abs value/limits */
+#define EVIOCGBIT(ev,len)	_IOC(_IOC_READ, 'E', 0x20 + (ev), len)	/* get event bits */
+#define EVIOCGABS(abs)		_IOR('E', 0x40 + (abs), struct input_absinfo)	/* get abs value/limits */
+#define EVIOCSABS(abs)		_IOW('E', 0xc0 + (abs), struct input_absinfo)	/* set abs value/limits */
 
 #define EVIOCSFF		_IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect))	/* send a force effect to a force feedback device */
 #define EVIOCRMFF		_IOW('E', 0x81, int)			/* Erase a force effect */
diff --git a/include/linux/input/kxtj9.h b/include/linux/input/kxtj9.h
new file mode 100644
index 0000000..f6bac89
--- /dev/null
+++ b/include/linux/input/kxtj9.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __KXTJ9_H__
+#define __KXTJ9_H__
+
+#define KXTJ9_I2C_ADDR		0x0F
+
+struct kxtj9_platform_data {
+	unsigned int min_interval;	/* minimum poll interval (in milli-seconds) */
+
+	/*
+	 * By default, x is axis 0, y is axis 1, z is axis 2; these can be
+	 * changed to account for sensor orientation within the host device.
+	 */
+	u8 axis_map_x;
+	u8 axis_map_y;
+	u8 axis_map_z;
+
+	/*
+	 * Each axis can be negated to account for sensor orientation within
+	 * the host device.
+	 */
+	bool negate_x;
+	bool negate_y;
+	bool negate_z;
+
+	/* CTRL_REG1: set resolution, g-range, data ready enable */
+	/* Output resolution: 8-bit valid or 12-bit valid */
+	#define RES_8BIT		0
+	#define RES_12BIT		(1 << 6)
+	u8 res_12bit;
+	/* Output g-range: +/-2g, 4g, or 8g */
+	#define KXTJ9_G_2G		0
+	#define KXTJ9_G_4G		(1 << 3)
+	#define KXTJ9_G_8G		(1 << 4)
+	u8 g_range;
+
+	/* DATA_CTRL_REG: controls the output data rate of the part */
+	#define ODR12_5F		0
+	#define ODR25F			1
+	#define ODR50F			2
+	#define ODR100F			3
+	#define ODR200F			4
+	#define ODR400F			5
+	#define ODR800F			6
+	u8 data_odr_init;
+
+	int (*init)(void);
+	void (*exit)(void);
+	int (*power_on)(void);
+	int (*power_off)(void);
+};
+#endif  /* __KXTJ9_H__ */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 9a43ad7..46ac9a5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -56,6 +56,14 @@
 
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define DIV_ROUND_UP_ULL(ll,d) \
+	({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; })
+
+#if BITS_PER_LONG == 32
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d)
+#else
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
+#endif
 
 /* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
 #define roundup(x, y) (					\
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2ed0b6c..ddee79b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1132,7 +1132,7 @@
 	spinlock_t		addr_list_lock;
 	struct netdev_hw_addr_list	uc;	/* Unicast mac addresses */
 	struct netdev_hw_addr_list	mc;	/* Multicast mac addresses */
-	int			uc_promisc;
+	bool			uc_promisc;
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
 
@@ -1679,9 +1679,12 @@
 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
 					unsigned int offset)
 {
+	if (!pskb_may_pull(skb, hlen))
+		return NULL;
+
 	NAPI_GRO_CB(skb)->frag0 = NULL;
 	NAPI_GRO_CB(skb)->frag0_len = 0;
-	return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+	return skb->data + offset;
 }
 
 static inline void *skb_gro_mac_header(struct sk_buff *skb)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 504b289..a3c4bc8 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -563,6 +563,9 @@
 	NFSPROC4_CLNT_GETDEVICEINFO,
 	NFSPROC4_CLNT_LAYOUTCOMMIT,
 	NFSPROC4_CLNT_LAYOUTRETURN,
+	NFSPROC4_CLNT_SECINFO_NO_NAME,
+	NFSPROC4_CLNT_TEST_STATEID,
+	NFSPROC4_CLNT_FREE_STATEID,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 08c444a..50a661f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -16,6 +16,7 @@
 struct nfs4_sequence_res;
 struct nfs_server;
 struct nfs4_minor_version_ops;
+struct server_scope;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -77,12 +78,13 @@
 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session; 	/* sharred session */
-	struct list_head	cl_layouts;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;	/* client index cache cookie */
 #endif
+
+	struct server_scope	*server_scope;	/* from exchange_id */
 };
 
 /*
@@ -149,6 +151,7 @@
 	struct rb_root		openowner_id;
 	struct rb_root		lockowner_id;
 #endif
+	struct list_head	layouts;
 	struct list_head	delegations;
 	void (*destroy)(struct nfs_server *);
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 25311b3..e2791a2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -55,20 +55,28 @@
 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
 };
 
+struct nfs_pageio_descriptor;
+struct nfs_pageio_ops {
+	void	(*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
+	bool	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	int	(*pg_doio)(struct nfs_pageio_descriptor *);
+};
+
 struct nfs_pageio_descriptor {
 	struct list_head	pg_list;
 	unsigned long		pg_bytes_written;
 	size_t			pg_count;
 	size_t			pg_bsize;
 	unsigned int		pg_base;
-	char			pg_moreio;
+	unsigned char		pg_moreio : 1,
+				pg_recoalesce : 1;
 
 	struct inode		*pg_inode;
-	int			(*pg_doio)(struct nfs_pageio_descriptor *);
+	const struct nfs_pageio_ops *pg_ops;
 	int 			pg_ioflags;
 	int			pg_error;
+	const struct rpc_call_ops *pg_rpc_callops;
 	struct pnfs_layout_segment *pg_lseg;
-	bool			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,7 +93,7 @@
 			  pgoff_t idx_start, unsigned int npages, int tag);
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
-			     int (*doio)(struct nfs_pageio_descriptor *desc),
+			     const struct nfs_pageio_ops *pg_ops,
 			     size_t bsize,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -100,7 +108,6 @@
 extern	int nfs_set_page_tag_locked(struct nfs_page *req);
 extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
 
-
 /*
  * Lock the page of an asynchronous request without getting a new reference
  */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 00848d8..5b11595 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -269,9 +269,10 @@
 };
 
 struct nfs4_layoutreturn_args {
-	__u32   layout_type;
+	struct pnfs_layout_hdr *layout;
 	struct inode *inode;
 	nfs4_stateid stateid;
+	__u32   layout_type;
 	struct nfs4_sequence_args seq_args;
 };
 
@@ -1060,6 +1061,7 @@
 struct nfs41_exchange_id_res {
 	struct nfs_client		*client;
 	u32				flags;
+	struct server_scope		*server_scope;
 };
 
 struct nfs41_create_session_args {
@@ -1083,6 +1085,34 @@
 struct nfs41_reclaim_complete_res {
 	struct nfs4_sequence_res	seq_res;
 };
+
+#define SECINFO_STYLE_CURRENT_FH 0
+#define SECINFO_STYLE_PARENT 1
+struct nfs41_secinfo_no_name_args {
+	int				style;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs41_test_stateid_args {
+	nfs4_stateid			*stateid;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs41_test_stateid_res {
+	unsigned int			status;
+	struct nfs4_sequence_res	seq_res;
+};
+
+struct nfs41_free_stateid_args {
+	nfs4_stateid			*stateid;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs41_free_stateid_res {
+	unsigned int			status;
+	struct nfs4_sequence_res	seq_res;
+};
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
@@ -1096,6 +1126,7 @@
 	struct rpc_cred		*cred;
 	struct nfs_fattr	fattr;	/* fattr storage */
 	struct list_head	pages;	/* Coalesced read requests */
+	struct list_head	list;	/* lists of struct nfs_read_data */
 	struct nfs_page		*req;	/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;	/* Max length of pagevec */
@@ -1119,6 +1150,7 @@
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
 	struct list_head	pages;		/* Coalesced requests we wish to flush */
+	struct list_head	list;		/* lists of struct nfs_write_data */
 	struct nfs_page		*req;		/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;		/* Max length of pagevec */
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index 76efbdd..435dd5f 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -41,9 +41,6 @@
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <scsi/osd_protocol.h>
-
-#define PNFS_OSD_OSDNAME_MAXSIZE 256
 
 /*
  * draft-ietf-nfsv4-minorversion-22
@@ -99,12 +96,6 @@
 #define _DEVID_HI(oid_device_id) \
 	(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
 
-static inline int
-pnfs_osd_objid_xdr_sz(void)
-{
-	return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
-}
-
 enum pnfs_osd_version {
 	PNFS_OSD_MISSING              = 0,
 	PNFS_OSD_VERSION_1            = 1,
@@ -189,8 +180,6 @@
 	struct nfs4_string		oti_scsi_device_id;
 };
 
-enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
-
 /*   struct netaddr4 {
  *       // see struct rpcb in RFC1833
  *       string r_netid<>;    // network id
@@ -207,12 +196,6 @@
 	struct pnfs_osd_net_addr	ota_netaddr;
 };
 
-enum {
-	NETWORK_ID_MAX = 16 / 4,
-	UNIVERSAL_ADDRESS_MAX = 64 / 4,
-	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
-};
-
 struct pnfs_osd_deviceaddr {
 	struct pnfs_osd_targetid	oda_targetid;
 	struct pnfs_osd_targetaddr	oda_targetaddr;
@@ -222,15 +205,6 @@
 	struct nfs4_string		oda_osdname;
 };
 
-enum {
-	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
-	PNFS_OSD_DEVICEADDR_MAX =
-		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
-		2 /*oda_lun*/ +
-		1 + OSD_SYSTEMID_LEN +
-		1 + ODA_OSDNAME_MAX,
-};
-
 /* LAYOUTCOMMIT: layoutupdate */
 
 /*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
@@ -279,7 +253,7 @@
 	u32			oer_errno;
 };
 
-/* OSD XDR API */
+/* OSD XDR Client API */
 /* Layout helpers */
 /* Layout decoding is done in two parts:
  * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
@@ -337,8 +311,7 @@
 pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
 				 struct pnfs_osd_layoutupdate *lou);
 
-/* osd_ioerror encoding/decoding (layout_return) */
-/* Client */
+/* osd_ioerror encoding (layout_return) */
 extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
 extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
 
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 650af6d..643b96c 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -50,8 +50,6 @@
 
 struct proc_dir_entry {
 	unsigned int low_ino;
-	unsigned int namelen;
-	const char *name;
 	mode_t mode;
 	nlink_t nlink;
 	uid_t uid;
@@ -73,9 +71,11 @@
 	write_proc_t *write_proc;
 	atomic_t count;		/* use count */
 	int pde_users;	/* number of callers into module in progress */
-	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	struct completion *pde_unload_completion;
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
+	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	u8 namelen;
+	char name[];
 };
 
 enum kcore_type {
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 75cbf4f..9e65d9e 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -245,10 +245,16 @@
 	__u8	device_uuid[16]; /* user-space setable, ignored by kernel */
 	__u8	devflags;	/* per-device flags.  Only one defined...*/
 #define	WriteMostly1	1	/* mask for writemostly flag in above */
-	__u8	pad2[64-57];	/* set to 0 when writing */
+	/* Bad block log.  If there are any bad blocks the feature flag is set.
+	 * If offset and size are non-zero, that space is reserved and available
+	 */
+	__u8	bblog_shift;	/* shift from sectors to block size */
+	__le16	bblog_size;	/* number of sectors reserved for list */
+	__le32	bblog_offset;	/* sector offset from superblock to bblog,
+				 * signed - not unsigned */
 
 	/* array state information - 64 bytes */
-	__le64	utime;		/* 40 bits second, 24 btes microseconds */
+	__le64	utime;		/* 40 bits second, 24 bits microseconds */
 	__le64	events;		/* incremented when superblock updated */
 	__le64	resync_offset;	/* data before this offset (from data_offset) known to be in sync */
 	__le32	sb_csum;	/* checksum up to devs[max_dev] */
@@ -270,8 +276,8 @@
 					   * must be honoured
 					   */
 #define	MD_FEATURE_RESHAPE_ACTIVE	4
+#define	MD_FEATURE_BAD_BLOCKS		8 /* badblock list is not empty */
 
-#define	MD_FEATURE_ALL			(1|2|4)
+#define	MD_FEATURE_ALL			(1|2|4|8)
 
 #endif 
-
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 0828842..f7f3ce34 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -31,7 +31,7 @@
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
@@ -47,7 +47,7 @@
 		return 1;
 	return 0;
 }
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
 {
@@ -62,6 +62,6 @@
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 {
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index fe2d8e6..e775689 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -227,6 +227,10 @@
 void		rpc_destroy_wait_queue(struct rpc_wait_queue *);
 void		rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
 					rpc_action action);
+void		rpc_sleep_on_priority(struct rpc_wait_queue *,
+					struct rpc_task *,
+					rpc_action action,
+					int priority);
 void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 					struct rpc_task *);
 void		rpc_wake_up(struct rpc_wait_queue *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2f1e518..223588a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -92,7 +92,7 @@
 	struct module *		sv_module;	/* optional module to count when
 						 * adding threads */
 	svc_thread_fn		sv_function;	/* main function for threads */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
 						 * connection */
@@ -100,7 +100,7 @@
 	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
 						 * entries in the svc_cb_list */
 	struct svc_xprt		*sv_bc_xprt;	/* callback on fore channel */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 };
 
 /*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 81cce3b..15518a1 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -22,6 +22,7 @@
 #define RPC_MIN_SLOT_TABLE	(2U)
 #define RPC_DEF_SLOT_TABLE	(16U)
 #define RPC_MAX_SLOT_TABLE	(128U)
+#define RPC_MAX_SLOT_TABLE_LIMIT	(65536U)
 
 /*
  * This describes a timeout strategy
@@ -100,18 +101,18 @@
 	ktime_t			rq_xtime;	/* transmit time stamp */
 	int			rq_ntrans;
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct list_head	rq_bc_list;	/* Callback service list */
 	unsigned long		rq_bc_pa_state;	/* Backchannel prealloc state */
 	struct list_head	rq_bc_pa_list;	/* Backchannel prealloc list */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANEL */
 };
 #define rq_svec			rq_snd_buf.head
 #define rq_slen			rq_snd_buf.len
 
 struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
-	int		(*reserve_xprt)(struct rpc_task *task);
+	int		(*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*rpcbind)(struct rpc_task *task);
 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
@@ -164,12 +165,12 @@
 
 	struct rpc_wait_queue	binding;	/* requests waiting on rpcbind */
 	struct rpc_wait_queue	sending;	/* requests waiting to send */
-	struct rpc_wait_queue	resend;		/* requests waiting to resend */
 	struct rpc_wait_queue	pending;	/* requests in flight */
 	struct rpc_wait_queue	backlog;	/* waiting for slot */
 	struct list_head	free;		/* free slots */
-	struct rpc_rqst *	slot;		/* slot table storage */
-	unsigned int		max_reqs;	/* total slots */
+	unsigned int		max_reqs;	/* max number of slots */
+	unsigned int		min_reqs;	/* min number of slots */
+	atomic_t		num_reqs;	/* total slots */
 	unsigned long		state;		/* transport state */
 	unsigned char		shutdown   : 1,	/* being shut down */
 				resvport   : 1; /* use a reserved port */
@@ -200,7 +201,7 @@
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct svc_serv		*bc_serv;       /* The RPC service which will */
 						/* process the callback */
 	unsigned int		bc_alloc_count;	/* Total number of preallocs */
@@ -208,7 +209,7 @@
 						 * items */
 	struct list_head	bc_pa_list;	/* List of preallocated
 						 * backchannel rpc_rqst's */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 	struct list_head	recv;
 
 	struct {
@@ -228,15 +229,15 @@
 	const char		*address_strings[RPC_DISPLAY_MAX];
 };
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Backchannel flags
  */
 #define	RPC_BC_PA_IN_USE	0x0001		/* Preallocated backchannel */
 						/* buffer in use */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static inline int bc_prealloc(struct rpc_rqst *req)
 {
 	return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
@@ -246,7 +247,7 @@
 {
 	return 0;
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 struct xprt_create {
 	int			ident;		/* XPRT_TRANSPORT identifier */
@@ -271,8 +272,8 @@
 struct rpc_xprt		*xprt_create_transport(struct xprt_create *args);
 void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
-int			xprt_reserve_xprt(struct rpc_task *task);
-int			xprt_reserve_xprt_cong(struct rpc_task *task);
+int			xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
@@ -282,7 +283,9 @@
 void			xprt_release(struct rpc_task *task);
 struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
-struct rpc_xprt *	xprt_alloc(struct net *net, int size, int max_req);
+struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
+				unsigned int num_prealloc,
+				unsigned int max_req);
 void			xprt_free(struct rpc_xprt *);
 
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
@@ -321,7 +324,6 @@
 #define XPRT_CLOSING		(6)
 #define XPRT_CONNECTION_ABORT	(7)
 #define XPRT_CONNECTION_CLOSE	(8)
-#define XPRT_INITIALIZED	(9)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 011bcfe..111843f 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -59,6 +59,84 @@
 #define WATCHDOG_NOWAYOUT	0
 #endif
 
+struct watchdog_ops;
+struct watchdog_device;
+
+/** struct watchdog_ops - The watchdog-devices operations
+ *
+ * @owner:	The module owner.
+ * @start:	The routine for starting the watchdog device.
+ * @stop:	The routine for stopping the watchdog device.
+ * @ping:	The routine that sends a keepalive ping to the watchdog device.
+ * @status:	The routine that shows the status of the watchdog device.
+ * @set_timeout:The routine for setting the watchdog devices timeout value.
+ * @ioctl:	The routines that handles extra ioctl calls.
+ *
+ * The watchdog_ops structure contains a list of low-level operations
+ * that control a watchdog device. It also contains the module that owns
+ * these operations. The start and stop function are mandatory, all other
+ * functions are optonal.
+ */
+struct watchdog_ops {
+	struct module *owner;
+	/* mandatory operations */
+	int (*start)(struct watchdog_device *);
+	int (*stop)(struct watchdog_device *);
+	/* optional operations */
+	int (*ping)(struct watchdog_device *);
+	unsigned int (*status)(struct watchdog_device *);
+	int (*set_timeout)(struct watchdog_device *, unsigned int);
+	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+/** struct watchdog_device - The structure that defines a watchdog device
+ *
+ * @info:	Pointer to a watchdog_info structure.
+ * @ops:	Pointer to the list of watchdog operations.
+ * @bootstatus:	Status of the watchdog device at boot.
+ * @timeout:	The watchdog devices timeout value.
+ * @min_timeout:The watchdog devices minimum timeout value.
+ * @max_timeout:The watchdog devices maximum timeout value.
+ * @driver-data:Pointer to the drivers private data.
+ * @status:	Field that contains the devices internal status bits.
+ *
+ * The watchdog_device structure contains all information about a
+ * watchdog timer device.
+ *
+ * The driver-data field may not be accessed directly. It must be accessed
+ * via the watchdog_set_drvdata and watchdog_get_drvdata helpers.
+ */
+struct watchdog_device {
+	const struct watchdog_info *info;
+	const struct watchdog_ops *ops;
+	unsigned int bootstatus;
+	unsigned int timeout;
+	unsigned int min_timeout;
+	unsigned int max_timeout;
+	void *driver_data;
+	unsigned long status;
+/* Bit numbers for status flags */
+#define WDOG_ACTIVE		0	/* Is the watchdog running/active */
+#define WDOG_DEV_OPEN		1	/* Opened via /dev/watchdog ? */
+#define WDOG_ALLOW_RELEASE	2	/* Did we receive the magic char ? */
+#define WDOG_NO_WAY_OUT		3	/* Is 'nowayout' feature set ? */
+};
+
+/* Use the following functions to manipulate watchdog driver specific data */
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+{
+	wdd->driver_data = data;
+}
+
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+{
+	return wdd->driver_data;
+}
+
+/* drivers/watchdog/core/watchdog_core.c */
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
 #endif	/* __KERNEL__ */
 
 #endif  /* ifndef _LINUX_WATCHDOG_H */
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 38e8c4d..fd98bb9 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -38,7 +38,11 @@
 #define WM97XX_ADCSEL_X		0x1000	/* x coord measurement */
 #define WM97XX_ADCSEL_Y		0x2000	/* y coord measurement */
 #define WM97XX_ADCSEL_PRES	0x3000	/* pressure measurement */
-#define WM97XX_ADCSEL_MASK	0x7000
+#define WM97XX_AUX_ID1		0x4000
+#define WM97XX_AUX_ID2		0x5000
+#define WM97XX_AUX_ID3		0x6000
+#define WM97XX_AUX_ID4		0x7000
+#define WM97XX_ADCSEL_MASK	0x7000	/* ADC selection mask */
 #define WM97XX_COO		0x0800	/* enable coordinate mode */
 #define WM97XX_CTC		0x0400	/* enable continuous mode */
 #define WM97XX_CM_RATE_93	0x0000	/* 93.75Hz continuous rate */
@@ -61,13 +65,6 @@
 #define WM97XX_PRP_DET_DIG	0xc000	/* setect on, digitise on */
 #define WM97XX_RPR		0x2000	/* wake up on pen down */
 #define WM97XX_PEN_DOWN		0x8000	/* pen is down */
-#define WM97XX_ADCSRC_MASK	0x7000	/* ADC source mask */
-
-#define WM97XX_AUX_ID1		0x8001
-#define WM97XX_AUX_ID2		0x8002
-#define WM97XX_AUX_ID3		0x8003
-#define WM97XX_AUX_ID4		0x8004
-
 
 /* WM9712 Bits */
 #define WM9712_45W		0x1000	/* set for 5-wire touchscreen */
diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
index ea68b3c..988ba06 100644
--- a/include/scsi/iscsi_proto.h
+++ b/include/scsi/iscsi_proto.h
@@ -29,10 +29,40 @@
 /* default iSCSI listen port for incoming connections */
 #define ISCSI_LISTEN_PORT	3260
 
+/* iSCSI header length */
+#define ISCSI_HDR_LEN		48
+
+/* iSCSI CRC32C length */
+#define ISCSI_CRC_LEN		4
+
 /* Padding word length */
 #define ISCSI_PAD_LEN		4
 
 /*
+ * Serial Number Arithmetic, 32 bits, RFC1982
+ */
+
+static inline int iscsi_sna_lt(u32 n1, u32 n2)
+{
+	return (s32)(n1 - n2) < 0;
+}
+
+static inline int iscsi_sna_lte(u32 n1, u32 n2)
+{
+	return (s32)(n1 - n2) <= 0;
+}
+
+static inline int iscsi_sna_gt(u32 n1, u32 n2)
+{
+	return (s32)(n1 - n2) > 0;
+}
+
+static inline int iscsi_sna_gte(u32 n1, u32 n2)
+{
+	return (s32)(n1 - n2) >= 0;
+}
+
+/*
  * useful common(control and data pathes) macro
  */
 #define ntoh24(p) (((p)[0] << 16) | ((p)[1] << 8) | ((p)[2]))
@@ -116,7 +146,7 @@
 #define ISCSI_CDB_SIZE			16
 
 /* iSCSI PDU Header */
-struct iscsi_cmd {
+struct iscsi_scsi_req {
 	uint8_t opcode;
 	uint8_t flags;
 	__be16 rsvd2;
@@ -161,7 +191,7 @@
 };
 
 /* SCSI Response Header */
-struct iscsi_cmd_rsp {
+struct iscsi_scsi_rsp {
 	uint8_t opcode;
 	uint8_t flags;
 	uint8_t response;
@@ -406,7 +436,7 @@
 };
 
 /* Login Header */
-struct iscsi_login {
+struct iscsi_login_req {
 	uint8_t opcode;
 	uint8_t flags;
 	uint8_t max_version;	/* Max. version supported */
@@ -427,7 +457,13 @@
 #define ISCSI_FLAG_LOGIN_TRANSIT		0x80
 #define ISCSI_FLAG_LOGIN_CONTINUE		0x40
 #define ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK	0x0C	/* 2 bits */
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE1		0x04
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE2		0x08
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE3		0x0C
 #define ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK	0x03	/* 2 bits */
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE1		0x01
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE2		0x02
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE3		0x03
 
 #define ISCSI_LOGIN_CURRENT_STAGE(flags) \
 	((flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2)
@@ -550,17 +586,25 @@
 struct iscsi_snack {
 	uint8_t opcode;
 	uint8_t flags;
-	uint8_t rsvd2[14];
+	uint8_t rsvd2[2];
+	uint8_t hlength;
+	uint8_t dlength[3];
+	uint8_t lun[8];
 	itt_t	 itt;
+	__be32  ttt;
+	uint8_t rsvd3[4];
+	__be32  exp_statsn;
+	uint8_t rsvd4[8];
 	__be32	begrun;
 	__be32	runlength;
-	__be32	exp_statsn;
-	__be32	rsvd3;
-	__be32	exp_datasn;
-	uint8_t rsvd6[8];
 };
 
 /* SNACK PDU flags */
+#define ISCSI_FLAG_SNACK_TYPE_DATA		0
+#define ISCSI_FLAG_SNACK_TYPE_R2T		0
+#define ISCSI_FLAG_SNACK_TYPE_STATUS		1
+#define ISCSI_FLAG_SNACK_TYPE_DATA_ACK		2
+#define ISCSI_FLAG_SNACK_TYPE_RDATA		3
 #define ISCSI_FLAG_SNACK_TYPE_MASK	0x0F	/* 4 bits */
 
 /* Reject Message Header */
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index e1bad11..57e71fa 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -507,6 +507,18 @@
 void snd_pcm_vma_notify_data(void *client, void *data);
 int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);
 
+
+#ifdef CONFIG_SND_DEBUG
+void snd_pcm_debug_name(struct snd_pcm_substream *substream,
+			   char *name, size_t len);
+#else
+static inline void
+snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
+{
+	*buf = 0;
+}
+#endif
+
 /*
  *  PCM library
  */
@@ -749,17 +761,18 @@
 	return &params->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL];
 }
 
-#define params_access(p) ((__force snd_pcm_access_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_ACCESS)))
-#define params_format(p) ((__force snd_pcm_format_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_FORMAT)))
-#define params_subformat(p) snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_SUBFORMAT))
-#define params_channels(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min
-#define params_rate(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_RATE)->min
-#define params_period_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min
-#define params_period_bytes(p) ((params_period_size(p)*snd_pcm_format_physical_width(params_format(p))*params_channels(p))/8)
-#define params_periods(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIODS)->min
-#define params_buffer_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min
-#define params_buffer_bytes(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min
-
+#define params_channels(p) \
+	(hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min)
+#define params_rate(p) \
+	(hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_RATE)->min)
+#define params_period_size(p) \
+	(hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min)
+#define params_periods(p) \
+	(hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIODS)->min)
+#define params_buffer_size(p) \
+	(hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min)
+#define params_buffer_bytes(p) \
+	(hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min)
 
 int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v);
 void snd_interval_mul(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c);
diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h
index 85cf1cf..f494f1e 100644
--- a/include/sound/pcm_params.h
+++ b/include/sound/pcm_params.h
@@ -337,5 +337,19 @@
 	return 0;
 }
 
-#endif /* __SOUND_PCM_PARAMS_H */
+#define params_access(p) ((__force snd_pcm_access_t)\
+		snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_ACCESS)))
+#define params_format(p) ((__force snd_pcm_format_t)\
+		snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_FORMAT)))
+#define params_subformat(p) \
+	snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_SUBFORMAT))
 
+static inline unsigned int
+params_period_bytes(const struct snd_pcm_hw_params *p)
+{
+	return (params_period_size(p) *
+		snd_pcm_format_physical_width(params_format(p)) *
+		params_channels(p)) / 8;
+}
+
+#endif /* __SOUND_PCM_PARAMS_H */
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index e09505c..e0583b7 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -266,6 +266,12 @@
 	.get = snd_soc_dapm_get_enum_virt, \
 	.put = snd_soc_dapm_put_enum_virt, \
 	.private_value = (unsigned long)&xenum }
+#define SOC_DAPM_ENUM_EXT(xname, xenum, xget, xput) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+	.info = snd_soc_info_enum_double, \
+	.get = xget, \
+	.put = xput, \
+	.private_value = (unsigned long)&xenum }
 #define SOC_DAPM_VALUE_ENUM(xname, xenum) \
 {	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
 	.info = snd_soc_info_enum_double, \
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9844580..1d2b6ce 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -27,9 +27,11 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/cred.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/init_task.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mm.h>
@@ -1514,6 +1516,7 @@
 		struct cgroup *root_cgrp = &root->top_cgroup;
 		struct inode *inode;
 		struct cgroupfs_root *existing_root;
+		const struct cred *cred;
 		int i;
 
 		BUG_ON(sb->s_root != NULL);
@@ -1593,7 +1596,9 @@
 		BUG_ON(!list_empty(&root_cgrp->children));
 		BUG_ON(root->number_of_cgroups != 1);
 
+		cred = override_creds(&init_cred);
 		cgroup_populate_dir(root_cgrp);
+		revert_creds(cred);
 		mutex_unlock(&cgroup_mutex);
 		mutex_unlock(&inode->i_mutex);
 	} else {
diff --git a/kernel/compat.c b/kernel/compat.c
index 18197ae..616c781 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -992,11 +992,8 @@
 	sigset_from_compat(&newset, &newset32);
 	sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-	spin_lock_irq(&current->sighand->siglock);
 	current->saved_sigmask = current->blocked;
-	current->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&newset);
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
diff --git a/kernel/signal.c b/kernel/signal.c
index d7f70ae..291c970 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3102,15 +3102,11 @@
 
 SYSCALL_DEFINE1(ssetmask, int, newmask)
 {
-	int old;
+	int old = current->blocked.sig[0];
+	sigset_t newset;
 
-	spin_lock_irq(&current->sighand->siglock);
-	old = current->blocked.sig[0];
-
-	siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
-						  sigmask(SIGSTOP)));
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP)));
+	set_current_blocked(&newset);
 
 	return old;
 }
@@ -3167,11 +3163,8 @@
 		return -EFAULT;
 	sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-	spin_lock_irq(&current->sighand->siglock);
 	current->saved_sigmask = current->blocked;
-	current->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&newset);
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 934e221..9d40a07 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -695,7 +695,7 @@
 	ether_setup(dev);
 
 	dev->priv_flags		|= IFF_802_1Q_VLAN;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->tx_queue_len	= 0;
 
 	dev->netdev_ops		= &vlan_netdev_ops;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 8c100c9..d4f5dff 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -231,6 +231,7 @@
 	dev->addr_len = ETH_ALEN;
 
 	ether_setup(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->netdev_ops = &bnep_netdev_ops;
 
 	dev->watchdog_timeo  = HZ * 2;
diff --git a/net/core/dev.c b/net/core/dev.c
index 9444c5c..17d67b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4497,10 +4497,10 @@
 		 */
 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1);
-			dev->uc_promisc = 1;
+			dev->uc_promisc = true;
 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1);
-			dev->uc_promisc = 0;
+			dev->uc_promisc = false;
 		}
 
 		if (ops->ndo_set_multicast_list)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f76079c..e35a6fb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1070,7 +1070,9 @@
 		len = num_arg(&user_buffer[i], 10, &value);
 		if (len < 0)
 			return len;
-
+		if ((value > 0) &&
+		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+			return -ENOTSUPP;
 		i += len;
 		pkt_dev->clone_skb = value;
 
@@ -3555,7 +3557,6 @@
 	pkt_dev->min_pkt_size = ETH_ZLEN;
 	pkt_dev->max_pkt_size = ETH_ZLEN;
 	pkt_dev->nfrags = 0;
-	pkt_dev->clone_skb = pg_clone_skb_d;
 	pkt_dev->delay = pg_delay_d;
 	pkt_dev->count = pg_count_d;
 	pkt_dev->sofar = 0;
@@ -3563,7 +3564,6 @@
 	pkt_dev->udp_src_max = 9;
 	pkt_dev->udp_dst_min = 9;
 	pkt_dev->udp_dst_max = 9;
-
 	pkt_dev->vlan_p = 0;
 	pkt_dev->vlan_cfi = 0;
 	pkt_dev->vlan_id = 0xffff;
@@ -3575,6 +3575,8 @@
 	err = pktgen_setup_dev(pkt_dev, ifname);
 	if (err)
 		goto out1;
+	if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
+		pkt_dev->clone_skb = pg_clone_skb_d;
 
 	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
 					  &pktgen_if_fops, pkt_dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5cffb63..27997d3 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -231,6 +231,7 @@
  * eth_header_cache - fill cache entry from neighbour
  * @neigh: source neighbour
  * @hh: destination cache entry
+ * @type: Ethernet type field
  * Create an Ethernet header template from the neighbour.
  */
 int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
@@ -339,6 +340,7 @@
 	dev->addr_len		= ETH_ALEN;
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
+	dev->priv_flags		= IFF_TX_SKB_SHARING;
 
 	memset(dev->broadcast, 0xFF, ETH_ALEN);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 37b3c18..bc19bd0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1134,15 +1134,15 @@
 					struct in_device *in_dev)
 
 {
-	struct in_ifaddr *ifa = in_dev->ifa_list;
+	struct in_ifaddr *ifa;
 
-	if (!ifa)
-		return;
-
-	arp_send(ARPOP_REQUEST, ETH_P_ARP,
-		 ifa->ifa_local, dev,
-		 ifa->ifa_local, NULL,
-		 dev->dev_addr, NULL);
+	for (ifa = in_dev->ifa_list; ifa;
+	     ifa = ifa->ifa_next) {
+		arp_send(ARPOP_REQUEST, ETH_P_ARP,
+			 ifa->ifa_local, dev,
+			 ifa->ifa_local, NULL,
+			 dev->dev_addr, NULL);
+	}
 }
 
 /* Called only under RTNL semaphore */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a06c53c..a55500c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1481,6 +1481,8 @@
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
+	if (ifp->prefix_len == 127) /* RFC 6164 */
+		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
 	if (ipv6_addr_any(&addr))
 		return;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index a8193f5..d2726a7 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -103,7 +103,7 @@
 static void l2tp_eth_dev_setup(struct net_device *dev)
 {
 	ether_setup(dev);
-
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->netdev_ops		= &l2tp_eth_netdev_ops;
 	dev->destructor		= free_netdev;
 }
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index cd5fb40..556e7e6 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -698,6 +698,7 @@
 static void ieee80211_if_setup(struct net_device *dev)
 {
 	ether_setup(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->netdev_ops = &ieee80211_dataif_ops;
 	dev->destructor = free_netdev;
 }
diff --git a/net/socket.c b/net/socket.c
index 02dc82d..b1cbbcd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -467,7 +467,7 @@
 	struct inode *inode;
 	struct socket *sock;
 
-	inode = new_inode(sock_mnt->mnt_sb);
+	inode = new_inode_pseudo(sock_mnt->mnt_sb);
 	if (!inode)
 		return NULL;
 
@@ -580,7 +580,7 @@
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
-int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
+static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct kiocb iocb;
 	struct sock_iocb siocb;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index b2198e6..ffd243d 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -4,6 +4,10 @@
 config SUNRPC_GSS
 	tristate
 
+config SUNRPC_BACKCHANNEL
+	bool
+	depends on SUNRPC
+
 config SUNRPC_XPRT_RDMA
 	tristate
 	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9d2fca5..8209a04 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -13,6 +13,6 @@
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
 	    svc_xprt.o
-sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index cf06af3..91eaa26 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -29,8 +29,6 @@
 #define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
-#if defined(CONFIG_NFS_V4_1)
-
 /*
  * Helper routines that track the number of preallocation elements
  * on the transport.
@@ -174,7 +172,7 @@
 	dprintk("RPC:       setup backchannel transport failed\n");
 	return -1;
 }
-EXPORT_SYMBOL(xprt_setup_backchannel);
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
 
 /*
  * Destroys the backchannel preallocated structures.
@@ -204,7 +202,7 @@
 	dprintk("RPC:        backchannel list empty= %s\n",
 		list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
-EXPORT_SYMBOL(xprt_destroy_backchannel);
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
 
 /*
  * One or more rpc_rqst structure have been preallocated during the
@@ -279,4 +277,3 @@
 	spin_unlock_bh(&xprt->bc_pa_lock);
 }
 
-#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 1dd1a68..0b2eb38 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -27,8 +27,6 @@
  * reply over an existing open connection previously established by the client.
  */
 
-#if defined(CONFIG_NFS_V4_1)
-
 #include <linux/module.h>
 
 #include <linux/sunrpc/xprt.h>
@@ -63,4 +61,3 @@
 	return ret;
 }
 
-#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c50818f..c5347d2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -64,9 +64,9 @@
 static void	call_bind(struct rpc_task *task);
 static void	call_bind_status(struct rpc_task *task);
 static void	call_transmit(struct rpc_task *task);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static void	call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 static void	call_status(struct rpc_task *task);
 static void	call_transmit_status(struct rpc_task *task);
 static void	call_refresh(struct rpc_task *task);
@@ -715,7 +715,7 @@
 }
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
@@ -758,7 +758,7 @@
 	dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
 	return task;
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 void
 rpc_call_start(struct rpc_task *task)
@@ -1361,7 +1361,7 @@
 	}
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * 5b.	Send the backchannel RPC reply.  On error, drop the reply.  In
  * addition, disconnect on connectivity errors.
@@ -1425,7 +1425,7 @@
 	}
 	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * 6.	Sort out the RPC call status
@@ -1550,8 +1550,7 @@
 	kxdrdproc_t	decode = task->tk_msg.rpc_proc->p_decode;
 	__be32		*p;
 
-	dprintk("RPC: %5u call_decode (status %d)\n",
-			task->tk_pid, task->tk_status);
+	dprint_status(task);
 
 	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		if (clnt->cl_chatty)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4814e24..d12ffa5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -97,14 +97,16 @@
 /*
  * Add new request to a priority queue.
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+		struct rpc_task *task,
+		unsigned char queue_priority)
 {
 	struct list_head *q;
 	struct rpc_task *t;
 
 	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	q = &queue->tasks[task->tk_priority];
-	if (unlikely(task->tk_priority > queue->maxpriority))
+	q = &queue->tasks[queue_priority];
+	if (unlikely(queue_priority > queue->maxpriority))
 		q = &queue->tasks[queue->maxpriority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
@@ -123,12 +125,14 @@
  * improve overall performance.
  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
  */
-static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+		struct rpc_task *task,
+		unsigned char queue_priority)
 {
 	BUG_ON (RPC_IS_QUEUED(task));
 
 	if (RPC_IS_PRIORITY(queue))
-		__rpc_add_wait_queue_priority(queue, task);
+		__rpc_add_wait_queue_priority(queue, task, queue_priority);
 	else if (RPC_IS_SWAPPER(task))
 		list_add(&task->u.tk_wait.list, &queue->tasks[0]);
 	else
@@ -311,13 +315,15 @@
  * NB: An RPC task will only receive interrupt-driven events as long
  * as it's on a wait queue.
  */
-static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
-			rpc_action action)
+static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
+		struct rpc_task *task,
+		rpc_action action,
+		unsigned char queue_priority)
 {
 	dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
 			task->tk_pid, rpc_qname(q), jiffies);
 
-	__rpc_add_wait_queue(q, task);
+	__rpc_add_wait_queue(q, task, queue_priority);
 
 	BUG_ON(task->tk_callback != NULL);
 	task->tk_callback = action;
@@ -334,11 +340,25 @@
 	 * Protect the queue operations.
 	 */
 	spin_lock_bh(&q->lock);
-	__rpc_sleep_on(q, task, action);
+	__rpc_sleep_on_priority(q, task, action, task->tk_priority);
 	spin_unlock_bh(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on);
 
+void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
+		rpc_action action, int priority)
+{
+	/* We shouldn't ever put an inactive task to sleep */
+	BUG_ON(!RPC_IS_ACTIVATED(task));
+
+	/*
+	 * Protect the queue operations.
+	 */
+	spin_lock_bh(&q->lock);
+	__rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+	spin_unlock_bh(&q->lock);
+}
+
 /**
  * __rpc_do_wake_up_task - wake up a single rpc_task
  * @queue: wait queue
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2b90292..6a69a11 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1252,7 +1252,7 @@
 	}
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Process a backchannel RPC request that arrived over an existing
  * outbound connection
@@ -1300,8 +1300,8 @@
 		return 0;
 	}
 }
-EXPORT_SYMBOL(bc_svc_process);
-#endif /* CONFIG_NFS_V4_1 */
+EXPORT_SYMBOL_GPL(bc_svc_process);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * Return (transport-specific) limit on the rpc payload.
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f2cb5b8..767d494 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -68,12 +68,12 @@
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
 					  struct net *, struct sockaddr *,
 					  int, int);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
 					     struct net *, struct sockaddr *,
 					     int, int);
 static void svc_bc_sock_free(struct svc_xprt *xprt);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
@@ -1243,7 +1243,7 @@
 	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
 					     struct net *, struct sockaddr *,
 					     int, int);
@@ -1284,7 +1284,7 @@
 {
 	svc_unreg_xprt_class(&svc_tcp_bc_class);
 }
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
 static void svc_init_bc_xprt_sock(void)
 {
 }
@@ -1292,7 +1292,7 @@
 static void svc_cleanup_bc_xprt_sock(void)
 {
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
@@ -1623,7 +1623,7 @@
 	kfree(svsk);
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Create a back channel svc_xprt which shares the fore channel socket.
  */
@@ -1662,4 +1662,4 @@
 	if (xprt)
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f008c14..277ebd4 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -126,7 +126,7 @@
 	kaddr[buf->page_base + len] = '\0';
 	kunmap_atomic(kaddr, KM_USER0);
 }
-EXPORT_SYMBOL(xdr_terminate_string);
+EXPORT_SYMBOL_GPL(xdr_terminate_string);
 
 void
 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce5eb68..9b6a4d1 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,7 @@
 /*
  * Local functions
  */
+static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
 static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
 static void	xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
@@ -191,10 +192,10 @@
  * transport connects from colliding with writes.  No congestion control
  * is provided.
  */
-int xprt_reserve_xprt(struct rpc_task *task)
+int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_xprt	*xprt = req->rq_xprt;
+	int priority;
 
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
 		if (task == xprt->snd_task)
@@ -202,8 +203,10 @@
 		goto out_sleep;
 	}
 	xprt->snd_task = task;
-	req->rq_bytes_sent = 0;
-	req->rq_ntrans++;
+	if (req != NULL) {
+		req->rq_bytes_sent = 0;
+		req->rq_ntrans++;
+	}
 
 	return 1;
 
@@ -212,10 +215,13 @@
 			task->tk_pid, xprt);
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	if (req->rq_ntrans)
-		rpc_sleep_on(&xprt->resend, task, NULL);
+	if (req == NULL)
+		priority = RPC_PRIORITY_LOW;
+	else if (!req->rq_ntrans)
+		priority = RPC_PRIORITY_NORMAL;
 	else
-		rpc_sleep_on(&xprt->sending, task, NULL);
+		priority = RPC_PRIORITY_HIGH;
+	rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -239,22 +245,24 @@
  * integrated into the decision of whether a request is allowed to be
  * woken up and given access to the transport.
  */
-int xprt_reserve_xprt_cong(struct rpc_task *task)
+int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt = task->tk_xprt;
 	struct rpc_rqst *req = task->tk_rqstp;
+	int priority;
 
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
 		if (task == xprt->snd_task)
 			return 1;
 		goto out_sleep;
 	}
+	if (req == NULL) {
+		xprt->snd_task = task;
+		return 1;
+	}
 	if (__xprt_get_cong(xprt, task)) {
 		xprt->snd_task = task;
-		if (req) {
-			req->rq_bytes_sent = 0;
-			req->rq_ntrans++;
-		}
+		req->rq_bytes_sent = 0;
+		req->rq_ntrans++;
 		return 1;
 	}
 	xprt_clear_locked(xprt);
@@ -262,10 +270,13 @@
 	dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	if (req && req->rq_ntrans)
-		rpc_sleep_on(&xprt->resend, task, NULL);
+	if (req == NULL)
+		priority = RPC_PRIORITY_LOW;
+	else if (!req->rq_ntrans)
+		priority = RPC_PRIORITY_NORMAL;
 	else
-		rpc_sleep_on(&xprt->sending, task, NULL);
+		priority = RPC_PRIORITY_HIGH;
+	rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -275,7 +286,7 @@
 	int retval;
 
 	spin_lock_bh(&xprt->transport_lock);
-	retval = xprt->ops->reserve_xprt(task);
+	retval = xprt->ops->reserve_xprt(xprt, task);
 	spin_unlock_bh(&xprt->transport_lock);
 	return retval;
 }
@@ -288,12 +299,9 @@
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
 
-	task = rpc_wake_up_next(&xprt->resend);
-	if (!task) {
-		task = rpc_wake_up_next(&xprt->sending);
-		if (!task)
-			goto out_unlock;
-	}
+	task = rpc_wake_up_next(&xprt->sending);
+	if (task == NULL)
+		goto out_unlock;
 
 	req = task->tk_rqstp;
 	xprt->snd_task = task;
@@ -310,24 +318,25 @@
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
 	struct rpc_task *task;
+	struct rpc_rqst *req;
 
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
 	if (RPCXPRT_CONGESTED(xprt))
 		goto out_unlock;
-	task = rpc_wake_up_next(&xprt->resend);
-	if (!task) {
-		task = rpc_wake_up_next(&xprt->sending);
-		if (!task)
-			goto out_unlock;
+	task = rpc_wake_up_next(&xprt->sending);
+	if (task == NULL)
+		goto out_unlock;
+
+	req = task->tk_rqstp;
+	if (req == NULL) {
+		xprt->snd_task = task;
+		return;
 	}
 	if (__xprt_get_cong(xprt, task)) {
-		struct rpc_rqst *req = task->tk_rqstp;
 		xprt->snd_task = task;
-		if (req) {
-			req->rq_bytes_sent = 0;
-			req->rq_ntrans++;
-		}
+		req->rq_bytes_sent = 0;
+		req->rq_ntrans++;
 		return;
 	}
 out_unlock:
@@ -852,7 +861,7 @@
 		err = req->rq_reply_bytes_recvd;
 		goto out_unlock;
 	}
-	if (!xprt->ops->reserve_xprt(task))
+	if (!xprt->ops->reserve_xprt(xprt, task))
 		err = -EAGAIN;
 out_unlock:
 	spin_unlock_bh(&xprt->transport_lock);
@@ -928,28 +937,66 @@
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+
+	if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+		goto out;
+	req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+	if (req != NULL)
+		goto out;
+	atomic_dec(&xprt->num_reqs);
+	req = ERR_PTR(-ENOMEM);
+out:
+	return req;
+}
+
+static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+		kfree(req);
+		return true;
+	}
+	return false;
+}
+
 static void xprt_alloc_slot(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
+	struct rpc_rqst *req;
 
-	task->tk_status = 0;
-	if (task->tk_rqstp)
-		return;
 	if (!list_empty(&xprt->free)) {
-		struct rpc_rqst	*req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
-		list_del_init(&req->rq_list);
-		task->tk_rqstp = req;
-		xprt_request_init(task, xprt);
-		return;
+		req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
+		list_del(&req->rq_list);
+		goto out_init_req;
 	}
-	dprintk("RPC:       waiting for request slot\n");
+	req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
+	if (!IS_ERR(req))
+		goto out_init_req;
+	switch (PTR_ERR(req)) {
+	case -ENOMEM:
+		rpc_delay(task, HZ >> 2);
+		dprintk("RPC:       dynamic allocation of request slot "
+				"failed! Retrying\n");
+		break;
+	case -EAGAIN:
+		rpc_sleep_on(&xprt->backlog, task, NULL);
+		dprintk("RPC:       waiting for request slot\n");
+	}
 	task->tk_status = -EAGAIN;
-	task->tk_timeout = 0;
-	rpc_sleep_on(&xprt->backlog, task, NULL);
+	return;
+out_init_req:
+	task->tk_status = 0;
+	task->tk_rqstp = req;
+	xprt_request_init(task, xprt);
 }
 
 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
+	if (xprt_dynamic_free_slot(xprt, req))
+		return;
+
 	memset(req, 0, sizeof(*req));	/* mark unused */
 
 	spin_lock(&xprt->reserve_lock);
@@ -958,25 +1005,49 @@
 	spin_unlock(&xprt->reserve_lock);
 }
 
-struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
+static void xprt_free_all_slots(struct rpc_xprt *xprt)
+{
+	struct rpc_rqst *req;
+	while (!list_empty(&xprt->free)) {
+		req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
+		list_del(&req->rq_list);
+		kfree(req);
+	}
+}
+
+struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
+		unsigned int num_prealloc,
+		unsigned int max_alloc)
 {
 	struct rpc_xprt *xprt;
+	struct rpc_rqst *req;
+	int i;
 
 	xprt = kzalloc(size, GFP_KERNEL);
 	if (xprt == NULL)
 		goto out;
-	atomic_set(&xprt->count, 1);
 
-	xprt->max_reqs = max_req;
-	xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
-	if (xprt->slot == NULL)
+	xprt_init(xprt, net);
+
+	for (i = 0; i < num_prealloc; i++) {
+		req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+		if (!req)
+			break;
+		list_add(&req->rq_list, &xprt->free);
+	}
+	if (i < num_prealloc)
 		goto out_free;
+	if (max_alloc > num_prealloc)
+		xprt->max_reqs = max_alloc;
+	else
+		xprt->max_reqs = num_prealloc;
+	xprt->min_reqs = num_prealloc;
+	atomic_set(&xprt->num_reqs, num_prealloc);
 
-	xprt->xprt_net = get_net(net);
 	return xprt;
 
 out_free:
-	kfree(xprt);
+	xprt_free(xprt);
 out:
 	return NULL;
 }
@@ -985,7 +1056,7 @@
 void xprt_free(struct rpc_xprt *xprt)
 {
 	put_net(xprt->xprt_net);
-	kfree(xprt->slot);
+	xprt_free_all_slots(xprt);
 	kfree(xprt);
 }
 EXPORT_SYMBOL_GPL(xprt_free);
@@ -1001,10 +1072,24 @@
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
-	task->tk_status = -EIO;
+	task->tk_status = 0;
+	if (task->tk_rqstp != NULL)
+		return;
+
+	/* Note: grabbing the xprt_lock_write() here is not strictly needed,
+	 * but ensures that we throttle new slot allocation if the transport
+	 * is congested (e.g. if reconnecting or if we're out of socket
+	 * write buffer space).
+	 */
+	task->tk_timeout = 0;
+	task->tk_status = -EAGAIN;
+	if (!xprt_lock_write(xprt, task))
+		return;
+
 	spin_lock(&xprt->reserve_lock);
 	xprt_alloc_slot(task);
 	spin_unlock(&xprt->reserve_lock);
+	xprt_release_write(xprt, task);
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1021,6 +1106,7 @@
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 
+	INIT_LIST_HEAD(&req->rq_list);
 	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
@@ -1073,6 +1159,34 @@
 		xprt_free_bc_request(req);
 }
 
+static void xprt_init(struct rpc_xprt *xprt, struct net *net)
+{
+	atomic_set(&xprt->count, 1);
+
+	spin_lock_init(&xprt->transport_lock);
+	spin_lock_init(&xprt->reserve_lock);
+
+	INIT_LIST_HEAD(&xprt->free);
+	INIT_LIST_HEAD(&xprt->recv);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	spin_lock_init(&xprt->bc_pa_lock);
+	INIT_LIST_HEAD(&xprt->bc_pa_list);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+	xprt->last_used = jiffies;
+	xprt->cwnd = RPC_INITCWND;
+	xprt->bind_index = 0;
+
+	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
+	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
+	rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
+
+	xprt_init_xid(xprt);
+
+	xprt->xprt_net = get_net(net);
+}
+
 /**
  * xprt_create_transport - create an RPC transport
  * @args: rpc transport creation arguments
@@ -1081,7 +1195,6 @@
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
 	struct rpc_xprt	*xprt;
-	struct rpc_rqst	*req;
 	struct xprt_class *t;
 
 	spin_lock(&xprt_list_lock);
@@ -1100,46 +1213,17 @@
 	if (IS_ERR(xprt)) {
 		dprintk("RPC:       xprt_create_transport: failed, %ld\n",
 				-PTR_ERR(xprt));
-		return xprt;
+		goto out;
 	}
-	if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
-		/* ->setup returned a pre-initialized xprt: */
-		return xprt;
-
-	spin_lock_init(&xprt->transport_lock);
-	spin_lock_init(&xprt->reserve_lock);
-
-	INIT_LIST_HEAD(&xprt->free);
-	INIT_LIST_HEAD(&xprt->recv);
-#if defined(CONFIG_NFS_V4_1)
-	spin_lock_init(&xprt->bc_pa_lock);
-	INIT_LIST_HEAD(&xprt->bc_pa_list);
-#endif /* CONFIG_NFS_V4_1 */
-
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
 	if (xprt_has_timer(xprt))
 		setup_timer(&xprt->timer, xprt_init_autodisconnect,
 			    (unsigned long)xprt);
 	else
 		init_timer(&xprt->timer);
-	xprt->last_used = jiffies;
-	xprt->cwnd = RPC_INITCWND;
-	xprt->bind_index = 0;
-
-	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
-	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
-	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
-	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
-	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
-
-	/* initialize free list */
-	for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
-		list_add(&req->rq_list, &xprt->free);
-
-	xprt_init_xid(xprt);
-
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
+out:
 	return xprt;
 }
 
@@ -1157,7 +1241,6 @@
 	rpc_destroy_wait_queue(&xprt->binding);
 	rpc_destroy_wait_queue(&xprt->pending);
 	rpc_destroy_wait_queue(&xprt->sending);
-	rpc_destroy_wait_queue(&xprt->resend);
 	rpc_destroy_wait_queue(&xprt->backlog);
 	cancel_work_sync(&xprt->task_cleanup);
 	/*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0867070..b446e10 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -283,6 +283,7 @@
 	}
 
 	xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+			xprt_rdma_slot_table_entries,
 			xprt_rdma_slot_table_entries);
 	if (xprt == NULL) {
 		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
@@ -452,9 +453,8 @@
 }
 
 static int
-xprt_rdma_reserve_xprt(struct rpc_task *task)
+xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
 
@@ -466,7 +466,7 @@
 		BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
 	}
 	xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
-	return xprt_reserve_xprt_cong(task);
+	return xprt_reserve_xprt_cong(xprt, task);
 }
 
 /*
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ddf0528..08c5d5a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -109,7 +109,7 @@
  */
 
 /* temporary static scatter/gather max */
-#define RPCRDMA_MAX_DATA_SEGS	(8)	/* max scatter/gather */
+#define RPCRDMA_MAX_DATA_SEGS	(64)	/* max scatter/gather */
 #define RPCRDMA_MAX_SEGS 	(RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
 #define MAX_RPCRDMAHDR	(\
 	/* max supported RPC/RDMA header */ \
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 72abb73..d7f97ef 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -37,7 +37,7 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
 #include <linux/sunrpc/bc_xprt.h>
 #endif
 
@@ -54,7 +54,8 @@
  * xprtsock tunables
  */
 unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
-unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
+unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
 
 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
@@ -75,6 +76,7 @@
 
 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
 
@@ -104,6 +106,15 @@
 		.extra2		= &max_slot_table_size
 	},
 	{
+		.procname	= "tcp_max_slot_table_entries",
+		.data		= &xprt_max_tcp_slot_table_entries,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_slot_table_size,
+		.extra2		= &max_tcp_slot_table_limit
+	},
+	{
 		.procname	= "min_resvport",
 		.data		= &xprt_min_resvport,
 		.maxlen		= sizeof(unsigned int),
@@ -755,6 +766,8 @@
 	if (task == NULL)
 		goto out_release;
 	req = task->tk_rqstp;
+	if (req == NULL)
+		goto out_release;
 	if (req->rq_bytes_sent == 0)
 		goto out_release;
 	if (req->rq_bytes_sent == req->rq_snd_buf.len)
@@ -1236,7 +1249,7 @@
 	return 0;
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Obtains an rpc_rqst previously allocated and invokes the common
  * tcp read code to read the data.  The result is placed in the callback
@@ -1299,7 +1312,7 @@
 {
 	return xs_tcp_read_reply(xprt, desc);
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * Read data off the transport.  This can be either an RPC_CALL or an
@@ -2489,7 +2502,8 @@
 }
 
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
-				      unsigned int slot_table_size)
+				      unsigned int slot_table_size,
+				      unsigned int max_slot_table_size)
 {
 	struct rpc_xprt *xprt;
 	struct sock_xprt *new;
@@ -2499,7 +2513,8 @@
 		return ERR_PTR(-EBADF);
 	}
 
-	xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
+	xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
+			max_slot_table_size);
 	if (xprt == NULL) {
 		dprintk("RPC:       xs_setup_xprt: couldn't allocate "
 				"rpc_xprt\n");
@@ -2541,7 +2556,8 @@
 	struct rpc_xprt *xprt;
 	struct rpc_xprt *ret;
 
-	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+			xprt_max_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2605,7 +2621,8 @@
 	struct sock_xprt *transport;
 	struct rpc_xprt *ret;
 
-	xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
+	xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
+			xprt_udp_slot_table_entries);
 	if (IS_ERR(xprt))
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2681,7 +2698,8 @@
 	struct sock_xprt *transport;
 	struct rpc_xprt *ret;
 
-	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+			xprt_max_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2760,7 +2778,8 @@
 		 */
 		 return args->bc_xprt->xpt_bc_xprt;
 	}
-	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+			xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2947,8 +2966,26 @@
 #define param_check_slot_table_size(name, p) \
 	__param_check(name, p, unsigned int);
 
+static int param_set_max_slot_table_size(const char *val,
+				     const struct kernel_param *kp)
+{
+	return param_set_uint_minmax(val, kp,
+			RPC_MIN_SLOT_TABLE,
+			RPC_MAX_SLOT_TABLE_LIMIT);
+}
+
+static struct kernel_param_ops param_ops_max_slot_table_size = {
+	.set = param_set_max_slot_table_size,
+	.get = param_get_uint,
+};
+
+#define param_check_max_slot_table_size(name, p) \
+	__param_check(name, p, unsigned int);
+
 module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
 		   slot_table_size, 0644);
+module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
+		   max_slot_table_size, 0644);
 module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
 		   slot_table_size, 0644);
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1ad0f39..02751db 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -903,7 +903,7 @@
 	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    !is_world_regdom(last_request->alpha2)) {
 		REG_DBG_PRINT("Ignoring regulatory request %s "
-			      "since the driver requires its own regulaotry "
+			      "since the driver requires its own regulatory "
 			      "domain to be set first",
 			      reg_initiator_name(initiator));
 		return true;
@@ -1125,12 +1125,13 @@
 	enum ieee80211_band band;
 
 	if (ignore_reg_update(wiphy, initiator))
-		goto out;
+		return;
+
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
 			handle_band(wiphy, band, initiator);
 	}
-out:
+
 	reg_process_beacons(wiphy);
 	reg_process_ht_flags(wiphy);
 	if (wiphy->reg_notifier)
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 7312bf9..c1e18ba 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -73,7 +73,6 @@
 		cred = get_task_cred(tracer);
 		tracerp = aa_cred_profile(cred);
 	}
-	rcu_read_unlock();
 
 	/* not ptraced */
 	if (!tracer || unconfined(tracerp))
@@ -82,6 +81,7 @@
 	error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH);
 
 out:
+	rcu_read_unlock();
 	if (cred)
 		put_cred(cred);
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 3d2fd14..3783202 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -127,7 +127,7 @@
 	*inheritable = cred->cap_inheritable;
 	*permitted = cred->cap_permitted;
 
-	if (!unconfined(profile)) {
+	if (!unconfined(profile) && !COMPLAIN_MODE(profile)) {
 		*effective = cap_intersect(*effective, profile->caps.allow);
 		*permitted = cap_intersect(*permitted, profile->caps.allow);
 	}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 39d66dc..26b46ff 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -86,7 +86,7 @@
 				  struct inode *inode,
 				  struct file *file)
 {
-	mode_t mode = file->f_mode;
+	fmode_t mode = file->f_mode;
 
 	mutex_lock(&iint->mutex);
 	if (mode & FMODE_WRITE &&
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 1bf090a..b34cc6e 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -14,7 +14,7 @@
 	user_defined.o
 
 obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
-obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted.o
+obj-$(CONFIG_ENCRYPTED_KEYS) += ecryptfs_format.o encrypted.o
 obj-$(CONFIG_KEYS_COMPAT) += compat.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/security/keys/ecryptfs_format.c b/security/keys/ecryptfs_format.c
new file mode 100644
index 0000000..6daa3b6
--- /dev/null
+++ b/security/keys/ecryptfs_format.c
@@ -0,0 +1,81 @@
+/*
+ * ecryptfs_format.c: helper functions for the encrypted key type
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ * Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include "ecryptfs_format.h"
+
+u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok)
+{
+	return auth_tok->token.password.session_key_encryption_key;
+}
+EXPORT_SYMBOL(ecryptfs_get_auth_tok_key);
+
+/*
+ * ecryptfs_get_versions()
+ *
+ * Source code taken from the software 'ecryptfs-utils' version 83.
+ *
+ */
+void ecryptfs_get_versions(int *major, int *minor, int *file_version)
+{
+	*major = ECRYPTFS_VERSION_MAJOR;
+	*minor = ECRYPTFS_VERSION_MINOR;
+	if (file_version)
+		*file_version = ECRYPTFS_SUPPORTED_FILE_VERSION;
+}
+EXPORT_SYMBOL(ecryptfs_get_versions);
+
+/*
+ * ecryptfs_fill_auth_tok - fill the ecryptfs_auth_tok structure
+ *
+ * Fill the ecryptfs_auth_tok structure with required ecryptfs data.
+ * The source code is inspired to the original function generate_payload()
+ * shipped with the software 'ecryptfs-utils' version 83.
+ *
+ */
+int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok,
+			   const char *key_desc)
+{
+	int major, minor;
+
+	ecryptfs_get_versions(&major, &minor, NULL);
+	auth_tok->version = (((uint16_t)(major << 8) & 0xFF00)
+			     | ((uint16_t)minor & 0x00FF));
+	auth_tok->token_type = ECRYPTFS_PASSWORD;
+	strncpy((char *)auth_tok->token.password.signature, key_desc,
+		ECRYPTFS_PASSWORD_SIG_SIZE);
+	auth_tok->token.password.session_key_encryption_key_bytes =
+		ECRYPTFS_MAX_KEY_BYTES;
+	/*
+	 * Removed auth_tok->token.password.salt and
+	 * auth_tok->token.password.session_key_encryption_key
+	 * initialization from the original code
+	 */
+	/* TODO: Make the hash parameterizable via policy */
+	auth_tok->token.password.flags |=
+		ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET;
+	/* The kernel code will encrypt the session key. */
+	auth_tok->session_key.encrypted_key[0] = 0;
+	auth_tok->session_key.encrypted_key_size = 0;
+	/* Default; subject to change by kernel eCryptfs */
+	auth_tok->token.password.hash_algo = PGP_DIGEST_ALGO_SHA512;
+	auth_tok->token.password.flags &= ~(ECRYPTFS_PERSISTENT_PASSWORD);
+	return 0;
+}
+EXPORT_SYMBOL(ecryptfs_fill_auth_tok);
+
+MODULE_LICENSE("GPL");
diff --git a/security/keys/ecryptfs_format.h b/security/keys/ecryptfs_format.h
new file mode 100644
index 0000000..40294de
--- /dev/null
+++ b/security/keys/ecryptfs_format.h
@@ -0,0 +1,30 @@
+/*
+ * ecryptfs_format.h: helper functions for the encrypted key type
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ * Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#ifndef __KEYS_ECRYPTFS_H
+#define __KEYS_ECRYPTFS_H
+
+#include <linux/ecryptfs.h>
+
+#define PGP_DIGEST_ALGO_SHA512   10
+
+u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok);
+void ecryptfs_get_versions(int *major, int *minor, int *file_version);
+int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok,
+			   const char *key_desc);
+
+#endif /* __KEYS_ECRYPTFS_H */
diff --git a/security/keys/encrypted.c b/security/keys/encrypted.c
index b1cba5b..e7eca9e 100644
--- a/security/keys/encrypted.c
+++ b/security/keys/encrypted.c
@@ -1,8 +1,11 @@
 /*
  * Copyright (C) 2010 IBM Corporation
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
  *
- * Author:
+ * Authors:
  * Mimi Zohar <zohar@us.ibm.com>
+ * Roberto Sassu <roberto.sassu@polito.it>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,22 +29,27 @@
 #include <linux/rcupdate.h>
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
+#include <linux/ctype.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/aes.h>
 
 #include "encrypted.h"
+#include "ecryptfs_format.h"
 
 static const char KEY_TRUSTED_PREFIX[] = "trusted:";
 static const char KEY_USER_PREFIX[] = "user:";
 static const char hash_alg[] = "sha256";
 static const char hmac_alg[] = "hmac(sha256)";
 static const char blkcipher_alg[] = "cbc(aes)";
+static const char key_format_default[] = "default";
+static const char key_format_ecryptfs[] = "ecryptfs";
 static unsigned int ivsize;
 static int blksize;
 
 #define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1)
 #define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1)
+#define KEY_ECRYPTFS_DESC_LEN 16
 #define HASH_SIZE SHA256_DIGEST_SIZE
 #define MAX_DATA_SIZE 4096
 #define MIN_DATA_SIZE  20
@@ -58,6 +66,16 @@
 	Opt_err = -1, Opt_new, Opt_load, Opt_update
 };
 
+enum {
+	Opt_error = -1, Opt_default, Opt_ecryptfs
+};
+
+static const match_table_t key_format_tokens = {
+	{Opt_default, "default"},
+	{Opt_ecryptfs, "ecryptfs"},
+	{Opt_error, NULL}
+};
+
 static const match_table_t key_tokens = {
 	{Opt_new, "new"},
 	{Opt_load, "load"},
@@ -82,9 +100,37 @@
 }
 
 /*
+ * valid_ecryptfs_desc - verify the description of a new/loaded encrypted key
+ *
+ * The description of a encrypted key with format 'ecryptfs' must contain
+ * exactly 16 hexadecimal characters.
+ *
+ */
+static int valid_ecryptfs_desc(const char *ecryptfs_desc)
+{
+	int i;
+
+	if (strlen(ecryptfs_desc) != KEY_ECRYPTFS_DESC_LEN) {
+		pr_err("encrypted_key: key description must be %d hexadecimal "
+		       "characters long\n", KEY_ECRYPTFS_DESC_LEN);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < KEY_ECRYPTFS_DESC_LEN; i++) {
+		if (!isxdigit(ecryptfs_desc[i])) {
+			pr_err("encrypted_key: key description must contain "
+			       "only hexadecimal characters\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/*
  * valid_master_desc - verify the 'key-type:desc' of a new/updated master-key
  *
- * key-type:= "trusted:" | "encrypted:"
+ * key-type:= "trusted:" | "user:"
  * desc:= master-key description
  *
  * Verify that 'key-type' is valid and that 'desc' exists. On key update,
@@ -118,8 +164,9 @@
  * datablob_parse - parse the keyctl data
  *
  * datablob format:
- * new <master-key name> <decrypted data length>
- * load <master-key name> <decrypted data length> <encrypted iv + data>
+ * new [<format>] <master-key name> <decrypted data length>
+ * load [<format>] <master-key name> <decrypted data length>
+ *     <encrypted iv + data>
  * update <new-master-key name>
  *
  * Tokenizes a copy of the keyctl data, returning a pointer to each token,
@@ -127,52 +174,95 @@
  *
  * On success returns 0, otherwise -EINVAL.
  */
-static int datablob_parse(char *datablob, char **master_desc,
-			  char **decrypted_datalen, char **hex_encoded_iv)
+static int datablob_parse(char *datablob, const char **format,
+			  char **master_desc, char **decrypted_datalen,
+			  char **hex_encoded_iv)
 {
 	substring_t args[MAX_OPT_ARGS];
 	int ret = -EINVAL;
 	int key_cmd;
-	char *p;
+	int key_format;
+	char *p, *keyword;
 
-	p = strsep(&datablob, " \t");
-	if (!p)
+	keyword = strsep(&datablob, " \t");
+	if (!keyword) {
+		pr_info("encrypted_key: insufficient parameters specified\n");
 		return ret;
-	key_cmd = match_token(p, key_tokens, args);
+	}
+	key_cmd = match_token(keyword, key_tokens, args);
 
-	*master_desc = strsep(&datablob, " \t");
-	if (!*master_desc)
-		goto out;
+	/* Get optional format: default | ecryptfs */
+	p = strsep(&datablob, " \t");
+	if (!p) {
+		pr_err("encrypted_key: insufficient parameters specified\n");
+		return ret;
+	}
 
-	if (valid_master_desc(*master_desc, NULL) < 0)
+	key_format = match_token(p, key_format_tokens, args);
+	switch (key_format) {
+	case Opt_ecryptfs:
+	case Opt_default:
+		*format = p;
+		*master_desc = strsep(&datablob, " \t");
+		break;
+	case Opt_error:
+		*master_desc = p;
+		break;
+	}
+
+	if (!*master_desc) {
+		pr_info("encrypted_key: master key parameter is missing\n");
 		goto out;
+	}
+
+	if (valid_master_desc(*master_desc, NULL) < 0) {
+		pr_info("encrypted_key: master key parameter \'%s\' "
+			"is invalid\n", *master_desc);
+		goto out;
+	}
 
 	if (decrypted_datalen) {
 		*decrypted_datalen = strsep(&datablob, " \t");
-		if (!*decrypted_datalen)
+		if (!*decrypted_datalen) {
+			pr_info("encrypted_key: keylen parameter is missing\n");
 			goto out;
+		}
 	}
 
 	switch (key_cmd) {
 	case Opt_new:
-		if (!decrypted_datalen)
+		if (!decrypted_datalen) {
+			pr_info("encrypted_key: keyword \'%s\' not allowed "
+				"when called from .update method\n", keyword);
 			break;
+		}
 		ret = 0;
 		break;
 	case Opt_load:
-		if (!decrypted_datalen)
+		if (!decrypted_datalen) {
+			pr_info("encrypted_key: keyword \'%s\' not allowed "
+				"when called from .update method\n", keyword);
 			break;
+		}
 		*hex_encoded_iv = strsep(&datablob, " \t");
-		if (!*hex_encoded_iv)
+		if (!*hex_encoded_iv) {
+			pr_info("encrypted_key: hex blob is missing\n");
 			break;
+		}
 		ret = 0;
 		break;
 	case Opt_update:
-		if (decrypted_datalen)
+		if (decrypted_datalen) {
+			pr_info("encrypted_key: keyword \'%s\' not allowed "
+				"when called from .instantiate method\n",
+				keyword);
 			break;
+		}
 		ret = 0;
 		break;
 	case Opt_err:
+		pr_info("encrypted_key: keyword \'%s\' not recognized\n",
+			keyword);
 		break;
 	}
 out:
@@ -197,8 +287,8 @@
 	ascii_buf[asciiblob_len] = '\0';
 
 	/* copy datablob master_desc and datalen strings */
-	len = sprintf(ascii_buf, "%s %s ", epayload->master_desc,
-		      epayload->datalen);
+	len = sprintf(ascii_buf, "%s %s %s ", epayload->format,
+		      epayload->master_desc, epayload->datalen);
 
 	/* convert the hex encoded iv, encrypted-data and HMAC to ascii */
 	bufp = &ascii_buf[len];
@@ -378,11 +468,13 @@
 	} else
 		goto out;
 
-	if (IS_ERR(mkey))
+	if (IS_ERR(mkey)) {
 		pr_info("encrypted_key: key %s not found",
 			epayload->master_desc);
-	if (mkey)
-		dump_master_key(*master_key, *master_keylen);
+		goto out;
+	}
+
+	dump_master_key(*master_key, *master_keylen);
 out:
 	return mkey;
 }
@@ -439,9 +531,9 @@
 	if (ret < 0)
 		goto out;
 
-	digest = epayload->master_desc + epayload->datablob_len;
+	digest = epayload->format + epayload->datablob_len;
 	ret = calc_hmac(digest, derived_key, sizeof derived_key,
-			epayload->master_desc, epayload->datablob_len);
+			epayload->format, epayload->datablob_len);
 	if (!ret)
 		dump_hmac(NULL, digest, HASH_SIZE);
 out:
@@ -450,26 +542,35 @@
 
 /* verify HMAC before decrypting encrypted key */
 static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
-				const u8 *master_key, size_t master_keylen)
+				const u8 *format, const u8 *master_key,
+				size_t master_keylen)
 {
 	u8 derived_key[HASH_SIZE];
 	u8 digest[HASH_SIZE];
 	int ret;
+	char *p;
+	unsigned short len;
 
 	ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen);
 	if (ret < 0)
 		goto out;
 
-	ret = calc_hmac(digest, derived_key, sizeof derived_key,
-			epayload->master_desc, epayload->datablob_len);
+	len = epayload->datablob_len;
+	if (!format) {
+		p = epayload->master_desc;
+		len -= strlen(epayload->format) + 1;
+	} else
+		p = epayload->format;
+
+	ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len);
 	if (ret < 0)
 		goto out;
-	ret = memcmp(digest, epayload->master_desc + epayload->datablob_len,
+	ret = memcmp(digest, epayload->format + epayload->datablob_len,
 		     sizeof digest);
 	if (ret) {
 		ret = -EINVAL;
 		dump_hmac("datablob",
-			  epayload->master_desc + epayload->datablob_len,
+			  epayload->format + epayload->datablob_len,
 			  HASH_SIZE);
 		dump_hmac("calc", digest, HASH_SIZE);
 	}
@@ -514,13 +615,16 @@
 
 /* Allocate memory for decrypted key and datablob. */
 static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
+							 const char *format,
 							 const char *master_desc,
 							 const char *datalen)
 {
 	struct encrypted_key_payload *epayload = NULL;
 	unsigned short datablob_len;
 	unsigned short decrypted_datalen;
+	unsigned short payload_datalen;
 	unsigned int encrypted_datalen;
+	unsigned int format_len;
 	long dlen;
 	int ret;
 
@@ -528,29 +632,43 @@
 	if (ret < 0 || dlen < MIN_DATA_SIZE || dlen > MAX_DATA_SIZE)
 		return ERR_PTR(-EINVAL);
 
+	format_len = (!format) ? strlen(key_format_default) : strlen(format);
 	decrypted_datalen = dlen;
+	payload_datalen = decrypted_datalen;
+	if (format && !strcmp(format, key_format_ecryptfs)) {
+		if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
+			pr_err("encrypted_key: keylen for the ecryptfs format "
+			       "must be equal to %d bytes\n",
+			       ECRYPTFS_MAX_KEY_BYTES);
+			return ERR_PTR(-EINVAL);
+		}
+		decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES;
+		payload_datalen = sizeof(struct ecryptfs_auth_tok);
+	}
+
 	encrypted_datalen = roundup(decrypted_datalen, blksize);
 
-	datablob_len = strlen(master_desc) + 1 + strlen(datalen) + 1
-	    + ivsize + 1 + encrypted_datalen;
+	datablob_len = format_len + 1 + strlen(master_desc) + 1
+	    + strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen;
 
-	ret = key_payload_reserve(key, decrypted_datalen + datablob_len
+	ret = key_payload_reserve(key, payload_datalen + datablob_len
 				  + HASH_SIZE + 1);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
-	epayload = kzalloc(sizeof(*epayload) + decrypted_datalen +
+	epayload = kzalloc(sizeof(*epayload) + payload_datalen +
 			   datablob_len + HASH_SIZE + 1, GFP_KERNEL);
 	if (!epayload)
 		return ERR_PTR(-ENOMEM);
 
+	epayload->payload_datalen = payload_datalen;
 	epayload->decrypted_datalen = decrypted_datalen;
 	epayload->datablob_len = datablob_len;
 	return epayload;
 }
 
 static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
-				 const char *hex_encoded_iv)
+				 const char *format, const char *hex_encoded_iv)
 {
 	struct key *mkey;
 	u8 derived_key[HASH_SIZE];
@@ -571,14 +689,14 @@
 	hex2bin(epayload->iv, hex_encoded_iv, ivsize);
 	hex2bin(epayload->encrypted_data, hex_encoded_data, encrypted_datalen);
 
-	hmac = epayload->master_desc + epayload->datablob_len;
+	hmac = epayload->format + epayload->datablob_len;
 	hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2), HASH_SIZE);
 
 	mkey = request_master_key(epayload, &master_key, &master_keylen);
 	if (IS_ERR(mkey))
 		return PTR_ERR(mkey);
 
-	ret = datablob_hmac_verify(epayload, master_key, master_keylen);
+	ret = datablob_hmac_verify(epayload, format, master_key, master_keylen);
 	if (ret < 0) {
 		pr_err("encrypted_key: bad hmac (%d)\n", ret);
 		goto out;
@@ -598,13 +716,28 @@
 }
 
 static void __ekey_init(struct encrypted_key_payload *epayload,
-			const char *master_desc, const char *datalen)
+			const char *format, const char *master_desc,
+			const char *datalen)
 {
-	epayload->master_desc = epayload->decrypted_data
-	    + epayload->decrypted_datalen;
+	unsigned int format_len;
+
+	format_len = (!format) ? strlen(key_format_default) : strlen(format);
+	epayload->format = epayload->payload_data + epayload->payload_datalen;
+	epayload->master_desc = epayload->format + format_len + 1;
 	epayload->datalen = epayload->master_desc + strlen(master_desc) + 1;
 	epayload->iv = epayload->datalen + strlen(datalen) + 1;
 	epayload->encrypted_data = epayload->iv + ivsize + 1;
+	epayload->decrypted_data = epayload->payload_data;
+
+	if (!format)
+		memcpy(epayload->format, key_format_default, format_len);
+	else {
+		if (!strcmp(format, key_format_ecryptfs))
+			epayload->decrypted_data =
+				ecryptfs_get_auth_tok_key((struct ecryptfs_auth_tok *)epayload->payload_data);
+
+		memcpy(epayload->format, format, format_len);
+	}
 
 	memcpy(epayload->master_desc, master_desc, strlen(master_desc));
 	memcpy(epayload->datalen, datalen, strlen(datalen));
@@ -617,19 +750,29 @@
  * itself.  For an old key, decrypt the hex encoded data.
  */
 static int encrypted_init(struct encrypted_key_payload *epayload,
+			  const char *key_desc, const char *format,
 			  const char *master_desc, const char *datalen,
 			  const char *hex_encoded_iv)
 {
 	int ret = 0;
 
-	__ekey_init(epayload, master_desc, datalen);
+	if (format && !strcmp(format, key_format_ecryptfs)) {
+		ret = valid_ecryptfs_desc(key_desc);
+		if (ret < 0)
+			return ret;
+
+		ecryptfs_fill_auth_tok((struct ecryptfs_auth_tok *)epayload->payload_data,
+				       key_desc);
+	}
+
+	__ekey_init(epayload, format, master_desc, datalen);
 	if (!hex_encoded_iv) {
 		get_random_bytes(epayload->iv, ivsize);
 
 		get_random_bytes(epayload->decrypted_data,
 				 epayload->decrypted_datalen);
 	} else
-		ret = encrypted_key_decrypt(epayload, hex_encoded_iv);
+		ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv);
 	return ret;
 }
 
@@ -646,6 +789,7 @@
 {
 	struct encrypted_key_payload *epayload = NULL;
 	char *datablob = NULL;
+	const char *format = NULL;
 	char *master_desc = NULL;
 	char *decrypted_datalen = NULL;
 	char *hex_encoded_iv = NULL;
@@ -659,18 +803,19 @@
 		return -ENOMEM;
 	datablob[datalen] = 0;
 	memcpy(datablob, data, datalen);
-	ret = datablob_parse(datablob, &master_desc, &decrypted_datalen,
-			     &hex_encoded_iv);
+	ret = datablob_parse(datablob, &format, &master_desc,
+			     &decrypted_datalen, &hex_encoded_iv);
 	if (ret < 0)
 		goto out;
 
-	epayload = encrypted_key_alloc(key, master_desc, decrypted_datalen);
+	epayload = encrypted_key_alloc(key, format, master_desc,
+				       decrypted_datalen);
 	if (IS_ERR(epayload)) {
 		ret = PTR_ERR(epayload);
 		goto out;
 	}
-	ret = encrypted_init(epayload, master_desc, decrypted_datalen,
-			     hex_encoded_iv);
+	ret = encrypted_init(epayload, key->description, format, master_desc,
+			     decrypted_datalen, hex_encoded_iv);
 	if (ret < 0) {
 		kfree(epayload);
 		goto out;
@@ -706,6 +851,7 @@
 	struct encrypted_key_payload *new_epayload;
 	char *buf;
 	char *new_master_desc = NULL;
+	const char *format = NULL;
 	int ret = 0;
 
 	if (datalen <= 0 || datalen > 32767 || !data)
@@ -717,7 +863,7 @@
 
 	buf[datalen] = 0;
 	memcpy(buf, data, datalen);
-	ret = datablob_parse(buf, &new_master_desc, NULL, NULL);
+	ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
 	if (ret < 0)
 		goto out;
 
@@ -725,18 +871,19 @@
 	if (ret < 0)
 		goto out;
 
-	new_epayload = encrypted_key_alloc(key, new_master_desc,
-					   epayload->datalen);
+	new_epayload = encrypted_key_alloc(key, epayload->format,
+					   new_master_desc, epayload->datalen);
 	if (IS_ERR(new_epayload)) {
 		ret = PTR_ERR(new_epayload);
 		goto out;
 	}
 
-	__ekey_init(new_epayload, new_master_desc, epayload->datalen);
+	__ekey_init(new_epayload, epayload->format, new_master_desc,
+		    epayload->datalen);
 
 	memcpy(new_epayload->iv, epayload->iv, ivsize);
-	memcpy(new_epayload->decrypted_data, epayload->decrypted_data,
-	       epayload->decrypted_datalen);
+	memcpy(new_epayload->payload_data, epayload->payload_data,
+	       epayload->payload_datalen);
 
 	rcu_assign_pointer(key->payload.data, new_epayload);
 	call_rcu(&epayload->rcu, encrypted_rcu_free);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 6cff375..60d4e3f 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -251,6 +251,8 @@
 
 	if (IS_ERR(authkey_ref)) {
 		authkey = ERR_CAST(authkey_ref);
+		if (authkey == ERR_PTR(-EAGAIN))
+			authkey = ERR_PTR(-ENOKEY);
 		goto error;
 	}
 
diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig
index c8f3857..7c7f8c1 100644
--- a/security/tomoyo/Kconfig
+++ b/security/tomoyo/Kconfig
@@ -9,3 +9,64 @@
 	  Required userspace tools and further information may be
 	  found at <http://tomoyo.sourceforge.jp/>.
 	  If you are unsure how to answer this question, answer N.
+
+config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY
+	int "Default maximal count for learning mode"
+	default 2048
+	range 0 2147483647
+	depends on SECURITY_TOMOYO
+	help
+	  This is the default value for maximal ACL entries
+	  that are automatically appended into policy at "learning mode".
+	  Some programs access thousands of objects, so running
+	  such programs in "learning mode" dulls the system response
+	  and consumes much memory.
+	  This is the safeguard for such programs.
+
+config SECURITY_TOMOYO_MAX_AUDIT_LOG
+	int "Default maximal count for audit log"
+	default 1024
+	range 0 2147483647
+	depends on SECURITY_TOMOYO
+	help
+	  This is the default value for maximal entries for
+	  audit logs that the kernel can hold on memory.
+	  You can read the log via /sys/kernel/security/tomoyo/audit.
+	  If you don't need audit logs, you may set this value to 0.
+
+config SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+	bool "Activate without calling userspace policy loader."
+	default n
+	depends on SECURITY_TOMOYO
+	---help---
+	  Say Y here if you want to activate access control as soon as built-in
+	  policy was loaded. This option will be useful for systems where
+	  operations which can lead to the hijacking of the boot sequence are
+	  needed before loading the policy. For example, you can activate
+	  immediately after loading the fixed part of policy which will allow
+	  only operations needed for mounting a partition which contains the
+	  variant part of policy and verifying (e.g. running GPG check) and
+	  loading the variant part of policy. Since you can start using
+	  enforcing mode from the beginning, you can reduce the possibility of
+	  hijacking the boot sequence.
+
+config SECURITY_TOMOYO_POLICY_LOADER
+	string "Location of userspace policy loader"
+	default "/sbin/tomoyo-init"
+	depends on SECURITY_TOMOYO
+	depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+	---help---
+	  This is the default pathname of policy loader which is called before
+	  activation. You can override this setting via TOMOYO_loader= kernel
+	  command line option.
+
+config SECURITY_TOMOYO_ACTIVATION_TRIGGER
+	string "Trigger for calling userspace policy loader"
+	default "/sbin/init"
+	depends on SECURITY_TOMOYO
+	depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+	---help---
+	  This is the default pathname of activation trigger.
+	  You can override this setting via TOMOYO_trigger= kernel command line
+	  option. For example, if you pass init=/bin/systemd option, you may
+	  want to also pass TOMOYO_trigger=/bin/systemd option.
diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile
index 91640e9..95278b7 100644
--- a/security/tomoyo/Makefile
+++ b/security/tomoyo/Makefile
@@ -1 +1,48 @@
-obj-y = common.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o
+obj-y = audit.o common.o condition.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o
+
+$(obj)/policy/profile.conf:
+	@mkdir -p $(obj)/policy/
+	@echo Creating an empty policy/profile.conf
+	@touch $@
+
+$(obj)/policy/exception_policy.conf:
+	@mkdir -p $(obj)/policy/
+	@echo Creating a default policy/exception_policy.conf
+	@echo initialize_domain /sbin/modprobe from any >> $@
+	@echo initialize_domain /sbin/hotplug from any >> $@
+
+$(obj)/policy/domain_policy.conf:
+	@mkdir -p $(obj)/policy/
+	@echo Creating an empty policy/domain_policy.conf
+	@touch $@
+
+$(obj)/policy/manager.conf:
+	@mkdir -p $(obj)/policy/
+	@echo Creating an empty policy/manager.conf
+	@touch $@
+
+$(obj)/policy/stat.conf:
+	@mkdir -p $(obj)/policy/
+	@echo Creating an empty policy/stat.conf
+	@touch $@
+
+$(obj)/builtin-policy.h: $(obj)/policy/profile.conf $(obj)/policy/exception_policy.conf $(obj)/policy/domain_policy.conf $(obj)/policy/manager.conf $(obj)/policy/stat.conf
+	@echo Generating built-in policy for TOMOYO 2.4.x.
+	@echo "static char tomoyo_builtin_profile[] __initdata =" > $@.tmp
+	@sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/profile.conf >> $@.tmp
+	@echo "\"\";" >> $@.tmp
+	@echo "static char tomoyo_builtin_exception_policy[] __initdata =" >> $@.tmp
+	@sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/exception_policy.conf >> $@.tmp
+	@echo "\"\";" >> $@.tmp
+	@echo "static char tomoyo_builtin_domain_policy[] __initdata =" >> $@.tmp
+	@sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/domain_policy.conf >> $@.tmp
+	@echo "\"\";" >> $@.tmp
+	@echo "static char tomoyo_builtin_manager[] __initdata =" >> $@.tmp
+	@sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/manager.conf >> $@.tmp
+	@echo "\"\";" >> $@.tmp
+	@echo "static char tomoyo_builtin_stat[] __initdata =" >> $@.tmp
+	@sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/stat.conf >> $@.tmp
+	@echo "\"\";" >> $@.tmp
+	@mv $@.tmp $@
+
+$(obj)/common.o: $(obj)/builtin-policy.h
diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
new file mode 100644
index 0000000..5dbb1f7
--- /dev/null
+++ b/security/tomoyo/audit.c
@@ -0,0 +1,456 @@
+/*
+ * security/tomoyo/audit.c
+ *
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
+ */
+
+#include "common.h"
+#include <linux/slab.h>
+
+/**
+ * tomoyo_print_bprm - Print "struct linux_binprm" for auditing.
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ * @dump: Pointer to "struct tomoyo_page_dump".
+ *
+ * Returns the contents of @bprm on success, NULL otherwise.
+ *
+ * This function uses kzalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+static char *tomoyo_print_bprm(struct linux_binprm *bprm,
+			       struct tomoyo_page_dump *dump)
+{
+	static const int tomoyo_buffer_len = 4096 * 2;
+	char *buffer = kzalloc(tomoyo_buffer_len, GFP_NOFS);
+	char *cp;
+	char *last_start;
+	int len;
+	unsigned long pos = bprm->p;
+	int offset = pos % PAGE_SIZE;
+	int argv_count = bprm->argc;
+	int envp_count = bprm->envc;
+	bool truncated = false;
+	if (!buffer)
+		return NULL;
+	len = snprintf(buffer, tomoyo_buffer_len - 1, "argv[]={ ");
+	cp = buffer + len;
+	if (!argv_count) {
+		memmove(cp, "} envp[]={ ", 11);
+		cp += 11;
+	}
+	last_start = cp;
+	while (argv_count || envp_count) {
+		if (!tomoyo_dump_page(bprm, pos, dump))
+			goto out;
+		pos += PAGE_SIZE - offset;
+		/* Read. */
+		while (offset < PAGE_SIZE) {
+			const char *kaddr = dump->data;
+			const unsigned char c = kaddr[offset++];
+			if (cp == last_start)
+				*cp++ = '"';
+			if (cp >= buffer + tomoyo_buffer_len - 32) {
+				/* Reserve some room for "..." string. */
+				truncated = true;
+			} else if (c == '\\') {
+				*cp++ = '\\';
+				*cp++ = '\\';
+			} else if (c > ' ' && c < 127) {
+				*cp++ = c;
+			} else if (!c) {
+				*cp++ = '"';
+				*cp++ = ' ';
+				last_start = cp;
+			} else {
+				*cp++ = '\\';
+				*cp++ = (c >> 6) + '0';
+				*cp++ = ((c >> 3) & 7) + '0';
+				*cp++ = (c & 7) + '0';
+			}
+			if (c)
+				continue;
+			if (argv_count) {
+				if (--argv_count == 0) {
+					if (truncated) {
+						cp = last_start;
+						memmove(cp, "... ", 4);
+						cp += 4;
+					}
+					memmove(cp, "} envp[]={ ", 11);
+					cp += 11;
+					last_start = cp;
+					truncated = false;
+				}
+			} else if (envp_count) {
+				if (--envp_count == 0) {
+					if (truncated) {
+						cp = last_start;
+						memmove(cp, "... ", 4);
+						cp += 4;
+					}
+				}
+			}
+			if (!argv_count && !envp_count)
+				break;
+		}
+		offset = 0;
+	}
+	*cp++ = '}';
+	*cp = '\0';
+	return buffer;
+out:
+	snprintf(buffer, tomoyo_buffer_len - 1,
+		 "argv[]={ ... } envp[]= { ... }");
+	return buffer;
+}
+
+/**
+ * tomoyo_filetype - Get string representation of file type.
+ *
+ * @mode: Mode value for stat().
+ *
+ * Returns file type string.
+ */
+static inline const char *tomoyo_filetype(const mode_t mode)
+{
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+	case 0:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FILE];
+	case S_IFDIR:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_DIRECTORY];
+	case S_IFLNK:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SYMLINK];
+	case S_IFIFO:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FIFO];
+	case S_IFSOCK:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SOCKET];
+	case S_IFBLK:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_BLOCK_DEV];
+	case S_IFCHR:
+		return tomoyo_condition_keyword[TOMOYO_TYPE_IS_CHAR_DEV];
+	}
+	return "unknown"; /* This should not happen. */
+}
+
+/**
+ * tomoyo_print_header - Get header line of audit log.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ *
+ * Returns string representation.
+ *
+ * This function uses kmalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+static char *tomoyo_print_header(struct tomoyo_request_info *r)
+{
+	struct tomoyo_time stamp;
+	const pid_t gpid = task_pid_nr(current);
+	struct tomoyo_obj_info *obj = r->obj;
+	static const int tomoyo_buffer_len = 4096;
+	char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
+	int pos;
+	u8 i;
+	if (!buffer)
+		return NULL;
+	{
+		struct timeval tv;
+		do_gettimeofday(&tv);
+		tomoyo_convert_time(tv.tv_sec, &stamp);
+	}
+	pos = snprintf(buffer, tomoyo_buffer_len - 1,
+		       "#%04u/%02u/%02u %02u:%02u:%02u# profile=%u mode=%s "
+		       "granted=%s (global-pid=%u) task={ pid=%u ppid=%u "
+		       "uid=%u gid=%u euid=%u egid=%u suid=%u sgid=%u "
+		       "fsuid=%u fsgid=%u }", stamp.year, stamp.month,
+		       stamp.day, stamp.hour, stamp.min, stamp.sec, r->profile,
+		       tomoyo_mode[r->mode], tomoyo_yesno(r->granted), gpid,
+		       tomoyo_sys_getpid(), tomoyo_sys_getppid(),
+		       current_uid(), current_gid(), current_euid(),
+		       current_egid(), current_suid(), current_sgid(),
+		       current_fsuid(), current_fsgid());
+	if (!obj)
+		goto no_obj_info;
+	if (!obj->validate_done) {
+		tomoyo_get_attributes(obj);
+		obj->validate_done = true;
+	}
+	for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
+		struct tomoyo_mini_stat *stat;
+		unsigned int dev;
+		mode_t mode;
+		if (!obj->stat_valid[i])
+			continue;
+		stat = &obj->stat[i];
+		dev = stat->dev;
+		mode = stat->mode;
+		if (i & 1) {
+			pos += snprintf(buffer + pos,
+					tomoyo_buffer_len - 1 - pos,
+					" path%u.parent={ uid=%u gid=%u "
+					"ino=%lu perm=0%o }", (i >> 1) + 1,
+					stat->uid, stat->gid, (unsigned long)
+					stat->ino, stat->mode & S_IALLUGO);
+			continue;
+		}
+		pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
+				" path%u={ uid=%u gid=%u ino=%lu major=%u"
+				" minor=%u perm=0%o type=%s", (i >> 1) + 1,
+				stat->uid, stat->gid, (unsigned long)
+				stat->ino, MAJOR(dev), MINOR(dev),
+				mode & S_IALLUGO, tomoyo_filetype(mode));
+		if (S_ISCHR(mode) || S_ISBLK(mode)) {
+			dev = stat->rdev;
+			pos += snprintf(buffer + pos,
+					tomoyo_buffer_len - 1 - pos,
+					" dev_major=%u dev_minor=%u",
+					MAJOR(dev), MINOR(dev));
+		}
+		pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
+				" }");
+	}
+no_obj_info:
+	if (pos < tomoyo_buffer_len - 1)
+		return buffer;
+	kfree(buffer);
+	return NULL;
+}
+
+/**
+ * tomoyo_init_log - Allocate buffer for audit logs.
+ *
+ * @r:    Pointer to "struct tomoyo_request_info".
+ * @len:  Buffer size needed for @fmt and @args.
+ * @fmt:  The printf()'s format string.
+ * @args: va_list structure for @fmt.
+ *
+ * Returns pointer to allocated memory.
+ *
+ * This function uses kzalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
+		      va_list args)
+{
+	char *buf = NULL;
+	char *bprm_info = NULL;
+	const char *header = NULL;
+	char *realpath = NULL;
+	const char *symlink = NULL;
+	int pos;
+	const char *domainname = r->domain->domainname->name;
+	header = tomoyo_print_header(r);
+	if (!header)
+		return NULL;
+	/* +10 is for '\n' etc. and '\0'. */
+	len += strlen(domainname) + strlen(header) + 10;
+	if (r->ee) {
+		struct file *file = r->ee->bprm->file;
+		realpath = tomoyo_realpath_from_path(&file->f_path);
+		bprm_info = tomoyo_print_bprm(r->ee->bprm, &r->ee->dump);
+		if (!realpath || !bprm_info)
+			goto out;
+		/* +80 is for " exec={ realpath=\"%s\" argc=%d envc=%d %s }" */
+		len += strlen(realpath) + 80 + strlen(bprm_info);
+	} else if (r->obj && r->obj->symlink_target) {
+		symlink = r->obj->symlink_target->name;
+		/* +18 is for " symlink.target=\"%s\"" */
+		len += 18 + strlen(symlink);
+	}
+	len = tomoyo_round2(len);
+	buf = kzalloc(len, GFP_NOFS);
+	if (!buf)
+		goto out;
+	len--;
+	pos = snprintf(buf, len, "%s", header);
+	if (realpath) {
+		struct linux_binprm *bprm = r->ee->bprm;
+		pos += snprintf(buf + pos, len - pos,
+				" exec={ realpath=\"%s\" argc=%d envc=%d %s }",
+				realpath, bprm->argc, bprm->envc, bprm_info);
+	} else if (symlink)
+		pos += snprintf(buf + pos, len - pos, " symlink.target=\"%s\"",
+				symlink);
+	pos += snprintf(buf + pos, len - pos, "\n%s\n", domainname);
+	vsnprintf(buf + pos, len - pos, fmt, args);
+out:
+	kfree(realpath);
+	kfree(bprm_info);
+	kfree(header);
+	return buf;
+}
+
+/* Wait queue for /sys/kernel/security/tomoyo/audit. */
+static DECLARE_WAIT_QUEUE_HEAD(tomoyo_log_wait);
+
+/* Structure for audit log. */
+struct tomoyo_log {
+	struct list_head list;
+	char *log;
+	int size;
+};
+
+/* The list for "struct tomoyo_log". */
+static LIST_HEAD(tomoyo_log);
+
+/* Lock for "struct list_head tomoyo_log". */
+static DEFINE_SPINLOCK(tomoyo_log_lock);
+
+/* Length of "stuct list_head tomoyo_log". */
+static unsigned int tomoyo_log_count;
+
+/**
+ * tomoyo_get_audit - Get audit mode.
+ *
+ * @ns:          Pointer to "struct tomoyo_policy_namespace".
+ * @profile:     Profile number.
+ * @index:       Index number of functionality.
+ * @is_granted:  True if granted log, false otherwise.
+ *
+ * Returns true if this request should be audited, false otherwise.
+ */
+static bool tomoyo_get_audit(const struct tomoyo_policy_namespace *ns,
+			     const u8 profile, const u8 index,
+			     const bool is_granted)
+{
+	u8 mode;
+	const u8 category = tomoyo_index2category[index] +
+		TOMOYO_MAX_MAC_INDEX;
+	struct tomoyo_profile *p;
+	if (!tomoyo_policy_loaded)
+		return false;
+	p = tomoyo_profile(ns, profile);
+	if (tomoyo_log_count >= p->pref[TOMOYO_PREF_MAX_AUDIT_LOG])
+		return false;
+	mode = p->config[index];
+	if (mode == TOMOYO_CONFIG_USE_DEFAULT)
+		mode = p->config[category];
+	if (mode == TOMOYO_CONFIG_USE_DEFAULT)
+		mode = p->default_config;
+	if (is_granted)
+		return mode & TOMOYO_CONFIG_WANT_GRANT_LOG;
+	return mode & TOMOYO_CONFIG_WANT_REJECT_LOG;
+}
+
+/**
+ * tomoyo_write_log2 - Write an audit log.
+ *
+ * @r:    Pointer to "struct tomoyo_request_info".
+ * @len:  Buffer size needed for @fmt and @args.
+ * @fmt:  The printf()'s format string.
+ * @args: va_list structure for @fmt.
+ *
+ * Returns nothing.
+ */
+void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
+		       va_list args)
+{
+	char *buf;
+	struct tomoyo_log *entry;
+	bool quota_exceeded = false;
+	if (!tomoyo_get_audit(r->domain->ns, r->profile, r->type, r->granted))
+		goto out;
+	buf = tomoyo_init_log(r, len, fmt, args);
+	if (!buf)
+		goto out;
+	entry = kzalloc(sizeof(*entry), GFP_NOFS);
+	if (!entry) {
+		kfree(buf);
+		goto out;
+	}
+	entry->log = buf;
+	len = tomoyo_round2(strlen(buf) + 1);
+	/*
+	 * The entry->size is used for memory quota checks.
+	 * Don't go beyond strlen(entry->log).
+	 */
+	entry->size = len + tomoyo_round2(sizeof(*entry));
+	spin_lock(&tomoyo_log_lock);
+	if (tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT] &&
+	    tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] + entry->size >=
+	    tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT]) {
+		quota_exceeded = true;
+	} else {
+		tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] += entry->size;
+		list_add_tail(&entry->list, &tomoyo_log);
+		tomoyo_log_count++;
+	}
+	spin_unlock(&tomoyo_log_lock);
+	if (quota_exceeded) {
+		kfree(buf);
+		kfree(entry);
+		goto out;
+	}
+	wake_up(&tomoyo_log_wait);
+out:
+	return;
+}
+
+/**
+ * tomoyo_write_log - Write an audit log.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @fmt: The printf()'s format string, followed by parameters.
+ *
+ * Returns nothing.
+ */
+void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	va_start(args, fmt);
+	len = vsnprintf((char *) &len, 1, fmt, args) + 1;
+	va_end(args);
+	va_start(args, fmt);
+	tomoyo_write_log2(r, len, fmt, args);
+	va_end(args);
+}
+
+/**
+ * tomoyo_read_log - Read an audit log.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+void tomoyo_read_log(struct tomoyo_io_buffer *head)
+{
+	struct tomoyo_log *ptr = NULL;
+	if (head->r.w_pos)
+		return;
+	kfree(head->read_buf);
+	head->read_buf = NULL;
+	spin_lock(&tomoyo_log_lock);
+	if (!list_empty(&tomoyo_log)) {
+		ptr = list_entry(tomoyo_log.next, typeof(*ptr), list);
+		list_del(&ptr->list);
+		tomoyo_log_count--;
+		tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] -= ptr->size;
+	}
+	spin_unlock(&tomoyo_log_lock);
+	if (ptr) {
+		head->read_buf = ptr->log;
+		head->r.w[head->r.w_pos++] = head->read_buf;
+		kfree(ptr);
+	}
+}
+
+/**
+ * tomoyo_poll_log - Wait for an audit log.
+ *
+ * @file: Pointer to "struct file".
+ * @wait: Pointer to "poll_table".
+ *
+ * Returns POLLIN | POLLRDNORM when ready to read an audit log.
+ */
+int tomoyo_poll_log(struct file *file, poll_table *wait)
+{
+	if (tomoyo_log_count)
+		return POLLIN | POLLRDNORM;
+	poll_wait(file, &tomoyo_log_wait, wait);
+	if (tomoyo_log_count)
+		return POLLIN | POLLRDNORM;
+	return 0;
+}
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index a0d09e5..c8439cf2 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/common.c
  *
- * Common functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/uaccess.h>
@@ -11,56 +9,133 @@
 #include <linux/security.h>
 #include "common.h"
 
-static struct tomoyo_profile tomoyo_default_profile = {
-	.learning = &tomoyo_default_profile.preference,
-	.permissive = &tomoyo_default_profile.preference,
-	.enforcing = &tomoyo_default_profile.preference,
-	.preference.enforcing_verbose = true,
-	.preference.learning_max_entry = 2048,
-	.preference.learning_verbose = false,
-	.preference.permissive_verbose = true
-};
-
-/* Profile version. Currently only 20090903 is defined. */
-static unsigned int tomoyo_profile_version;
-
-/* Profile table. Memory is allocated as needed. */
-static struct tomoyo_profile *tomoyo_profile_ptr[TOMOYO_MAX_PROFILES];
-
-/* String table for functionality that takes 4 modes. */
-static const char *tomoyo_mode[4] = {
-	"disabled", "learning", "permissive", "enforcing"
+/* String table for operation mode. */
+const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE] = {
+	[TOMOYO_CONFIG_DISABLED]   = "disabled",
+	[TOMOYO_CONFIG_LEARNING]   = "learning",
+	[TOMOYO_CONFIG_PERMISSIVE] = "permissive",
+	[TOMOYO_CONFIG_ENFORCING]  = "enforcing"
 };
 
 /* String table for /sys/kernel/security/tomoyo/profile */
-static const char *tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
 				       + TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
-	[TOMOYO_MAC_FILE_EXECUTE]    = "file::execute",
-	[TOMOYO_MAC_FILE_OPEN]       = "file::open",
-	[TOMOYO_MAC_FILE_CREATE]     = "file::create",
-	[TOMOYO_MAC_FILE_UNLINK]     = "file::unlink",
-	[TOMOYO_MAC_FILE_MKDIR]      = "file::mkdir",
-	[TOMOYO_MAC_FILE_RMDIR]      = "file::rmdir",
-	[TOMOYO_MAC_FILE_MKFIFO]     = "file::mkfifo",
-	[TOMOYO_MAC_FILE_MKSOCK]     = "file::mksock",
-	[TOMOYO_MAC_FILE_TRUNCATE]   = "file::truncate",
-	[TOMOYO_MAC_FILE_SYMLINK]    = "file::symlink",
-	[TOMOYO_MAC_FILE_REWRITE]    = "file::rewrite",
-	[TOMOYO_MAC_FILE_MKBLOCK]    = "file::mkblock",
-	[TOMOYO_MAC_FILE_MKCHAR]     = "file::mkchar",
-	[TOMOYO_MAC_FILE_LINK]       = "file::link",
-	[TOMOYO_MAC_FILE_RENAME]     = "file::rename",
-	[TOMOYO_MAC_FILE_CHMOD]      = "file::chmod",
-	[TOMOYO_MAC_FILE_CHOWN]      = "file::chown",
-	[TOMOYO_MAC_FILE_CHGRP]      = "file::chgrp",
-	[TOMOYO_MAC_FILE_IOCTL]      = "file::ioctl",
-	[TOMOYO_MAC_FILE_CHROOT]     = "file::chroot",
-	[TOMOYO_MAC_FILE_MOUNT]      = "file::mount",
-	[TOMOYO_MAC_FILE_UMOUNT]     = "file::umount",
-	[TOMOYO_MAC_FILE_PIVOT_ROOT] = "file::pivot_root",
+	[TOMOYO_MAC_FILE_EXECUTE]    = "execute",
+	[TOMOYO_MAC_FILE_OPEN]       = "open",
+	[TOMOYO_MAC_FILE_CREATE]     = "create",
+	[TOMOYO_MAC_FILE_UNLINK]     = "unlink",
+	[TOMOYO_MAC_FILE_GETATTR]    = "getattr",
+	[TOMOYO_MAC_FILE_MKDIR]      = "mkdir",
+	[TOMOYO_MAC_FILE_RMDIR]      = "rmdir",
+	[TOMOYO_MAC_FILE_MKFIFO]     = "mkfifo",
+	[TOMOYO_MAC_FILE_MKSOCK]     = "mksock",
+	[TOMOYO_MAC_FILE_TRUNCATE]   = "truncate",
+	[TOMOYO_MAC_FILE_SYMLINK]    = "symlink",
+	[TOMOYO_MAC_FILE_MKBLOCK]    = "mkblock",
+	[TOMOYO_MAC_FILE_MKCHAR]     = "mkchar",
+	[TOMOYO_MAC_FILE_LINK]       = "link",
+	[TOMOYO_MAC_FILE_RENAME]     = "rename",
+	[TOMOYO_MAC_FILE_CHMOD]      = "chmod",
+	[TOMOYO_MAC_FILE_CHOWN]      = "chown",
+	[TOMOYO_MAC_FILE_CHGRP]      = "chgrp",
+	[TOMOYO_MAC_FILE_IOCTL]      = "ioctl",
+	[TOMOYO_MAC_FILE_CHROOT]     = "chroot",
+	[TOMOYO_MAC_FILE_MOUNT]      = "mount",
+	[TOMOYO_MAC_FILE_UMOUNT]     = "unmount",
+	[TOMOYO_MAC_FILE_PIVOT_ROOT] = "pivot_root",
 	[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file",
 };
 
+/* String table for conditions. */
+const char * const tomoyo_condition_keyword[TOMOYO_MAX_CONDITION_KEYWORD] = {
+	[TOMOYO_TASK_UID]             = "task.uid",
+	[TOMOYO_TASK_EUID]            = "task.euid",
+	[TOMOYO_TASK_SUID]            = "task.suid",
+	[TOMOYO_TASK_FSUID]           = "task.fsuid",
+	[TOMOYO_TASK_GID]             = "task.gid",
+	[TOMOYO_TASK_EGID]            = "task.egid",
+	[TOMOYO_TASK_SGID]            = "task.sgid",
+	[TOMOYO_TASK_FSGID]           = "task.fsgid",
+	[TOMOYO_TASK_PID]             = "task.pid",
+	[TOMOYO_TASK_PPID]            = "task.ppid",
+	[TOMOYO_EXEC_ARGC]            = "exec.argc",
+	[TOMOYO_EXEC_ENVC]            = "exec.envc",
+	[TOMOYO_TYPE_IS_SOCKET]       = "socket",
+	[TOMOYO_TYPE_IS_SYMLINK]      = "symlink",
+	[TOMOYO_TYPE_IS_FILE]         = "file",
+	[TOMOYO_TYPE_IS_BLOCK_DEV]    = "block",
+	[TOMOYO_TYPE_IS_DIRECTORY]    = "directory",
+	[TOMOYO_TYPE_IS_CHAR_DEV]     = "char",
+	[TOMOYO_TYPE_IS_FIFO]         = "fifo",
+	[TOMOYO_MODE_SETUID]          = "setuid",
+	[TOMOYO_MODE_SETGID]          = "setgid",
+	[TOMOYO_MODE_STICKY]          = "sticky",
+	[TOMOYO_MODE_OWNER_READ]      = "owner_read",
+	[TOMOYO_MODE_OWNER_WRITE]     = "owner_write",
+	[TOMOYO_MODE_OWNER_EXECUTE]   = "owner_execute",
+	[TOMOYO_MODE_GROUP_READ]      = "group_read",
+	[TOMOYO_MODE_GROUP_WRITE]     = "group_write",
+	[TOMOYO_MODE_GROUP_EXECUTE]   = "group_execute",
+	[TOMOYO_MODE_OTHERS_READ]     = "others_read",
+	[TOMOYO_MODE_OTHERS_WRITE]    = "others_write",
+	[TOMOYO_MODE_OTHERS_EXECUTE]  = "others_execute",
+	[TOMOYO_EXEC_REALPATH]        = "exec.realpath",
+	[TOMOYO_SYMLINK_TARGET]       = "symlink.target",
+	[TOMOYO_PATH1_UID]            = "path1.uid",
+	[TOMOYO_PATH1_GID]            = "path1.gid",
+	[TOMOYO_PATH1_INO]            = "path1.ino",
+	[TOMOYO_PATH1_MAJOR]          = "path1.major",
+	[TOMOYO_PATH1_MINOR]          = "path1.minor",
+	[TOMOYO_PATH1_PERM]           = "path1.perm",
+	[TOMOYO_PATH1_TYPE]           = "path1.type",
+	[TOMOYO_PATH1_DEV_MAJOR]      = "path1.dev_major",
+	[TOMOYO_PATH1_DEV_MINOR]      = "path1.dev_minor",
+	[TOMOYO_PATH2_UID]            = "path2.uid",
+	[TOMOYO_PATH2_GID]            = "path2.gid",
+	[TOMOYO_PATH2_INO]            = "path2.ino",
+	[TOMOYO_PATH2_MAJOR]          = "path2.major",
+	[TOMOYO_PATH2_MINOR]          = "path2.minor",
+	[TOMOYO_PATH2_PERM]           = "path2.perm",
+	[TOMOYO_PATH2_TYPE]           = "path2.type",
+	[TOMOYO_PATH2_DEV_MAJOR]      = "path2.dev_major",
+	[TOMOYO_PATH2_DEV_MINOR]      = "path2.dev_minor",
+	[TOMOYO_PATH1_PARENT_UID]     = "path1.parent.uid",
+	[TOMOYO_PATH1_PARENT_GID]     = "path1.parent.gid",
+	[TOMOYO_PATH1_PARENT_INO]     = "path1.parent.ino",
+	[TOMOYO_PATH1_PARENT_PERM]    = "path1.parent.perm",
+	[TOMOYO_PATH2_PARENT_UID]     = "path2.parent.uid",
+	[TOMOYO_PATH2_PARENT_GID]     = "path2.parent.gid",
+	[TOMOYO_PATH2_PARENT_INO]     = "path2.parent.ino",
+	[TOMOYO_PATH2_PARENT_PERM]    = "path2.parent.perm",
+};
+
+/* String table for PREFERENCE keyword. */
+static const char * const tomoyo_pref_keywords[TOMOYO_MAX_PREF] = {
+	[TOMOYO_PREF_MAX_AUDIT_LOG]      = "max_audit_log",
+	[TOMOYO_PREF_MAX_LEARNING_ENTRY] = "max_learning_entry",
+};
+
+/* String table for path operation. */
+const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
+	[TOMOYO_TYPE_EXECUTE]    = "execute",
+	[TOMOYO_TYPE_READ]       = "read",
+	[TOMOYO_TYPE_WRITE]      = "write",
+	[TOMOYO_TYPE_APPEND]     = "append",
+	[TOMOYO_TYPE_UNLINK]     = "unlink",
+	[TOMOYO_TYPE_GETATTR]    = "getattr",
+	[TOMOYO_TYPE_RMDIR]      = "rmdir",
+	[TOMOYO_TYPE_TRUNCATE]   = "truncate",
+	[TOMOYO_TYPE_SYMLINK]    = "symlink",
+	[TOMOYO_TYPE_CHROOT]     = "chroot",
+	[TOMOYO_TYPE_UMOUNT]     = "unmount",
+};
+
+/* String table for categories. */
+static const char * const tomoyo_category_keywords
+[TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
+	[TOMOYO_MAC_CATEGORY_FILE]       = "file",
+};
+
 /* Permit policy management by non-root user? */
 static bool tomoyo_manage_by_non_root;
 
@@ -71,11 +146,20 @@
  *
  * @value: Bool value.
  */
-static const char *tomoyo_yesno(const unsigned int value)
+const char *tomoyo_yesno(const unsigned int value)
 {
 	return value ? "yes" : "no";
 }
 
+/**
+ * tomoyo_addprintf - strncat()-like-snprintf().
+ *
+ * @buffer: Buffer to write to. Must be '\0'-terminated.
+ * @len:    Size of @buffer.
+ * @fmt:    The printf()'s format string, followed by parameters.
+ *
+ * Returns nothing.
+ */
 static void tomoyo_addprintf(char *buffer, int len, const char *fmt, ...)
 {
 	va_list args;
@@ -96,7 +180,7 @@
 {
 	while (head->r.w_pos) {
 		const char *w = head->r.w[0];
-		int len = strlen(w);
+		size_t len = strlen(w);
 		if (len) {
 			if (len > head->read_user_buf_avail)
 				len = head->read_user_buf_avail;
@@ -111,7 +195,7 @@
 		head->r.w[0] = w;
 		if (*w)
 			return false;
-		/* Add '\0' for query. */
+		/* Add '\0' for audit logs and query. */
 		if (head->poll) {
 			if (!head->read_user_buf_avail ||
 			    copy_to_user(head->read_user_buf, "", 1))
@@ -155,8 +239,8 @@
 void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
 {
 	va_list args;
-	int len;
-	int pos = head->r.avail;
+	size_t len;
+	size_t pos = head->r.avail;
 	int size = head->readbuf_size - pos;
 	if (size <= 0)
 		return;
@@ -171,11 +255,25 @@
 	tomoyo_set_string(head, head->read_buf + pos);
 }
 
+/**
+ * tomoyo_set_space - Put a space to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_set_space(struct tomoyo_io_buffer *head)
 {
 	tomoyo_set_string(head, " ");
 }
 
+/**
+ * tomoyo_set_lf - Put a line feed to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
 static bool tomoyo_set_lf(struct tomoyo_io_buffer *head)
 {
 	tomoyo_set_string(head, "\n");
@@ -183,6 +281,62 @@
 }
 
 /**
+ * tomoyo_set_slash - Put a shash to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_set_slash(struct tomoyo_io_buffer *head)
+{
+	tomoyo_set_string(head, "/");
+}
+
+/* List of namespaces. */
+LIST_HEAD(tomoyo_namespace_list);
+/* True if namespace other than tomoyo_kernel_namespace is defined. */
+static bool tomoyo_namespace_enabled;
+
+/**
+ * tomoyo_init_policy_namespace - Initialize namespace.
+ *
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
+ *
+ * Returns nothing.
+ */
+void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns)
+{
+	unsigned int idx;
+	for (idx = 0; idx < TOMOYO_MAX_ACL_GROUPS; idx++)
+		INIT_LIST_HEAD(&ns->acl_group[idx]);
+	for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
+		INIT_LIST_HEAD(&ns->group_list[idx]);
+	for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
+		INIT_LIST_HEAD(&ns->policy_list[idx]);
+	ns->profile_version = 20100903;
+	tomoyo_namespace_enabled = !list_empty(&tomoyo_namespace_list);
+	list_add_tail_rcu(&ns->namespace_list, &tomoyo_namespace_list);
+}
+
+/**
+ * tomoyo_print_namespace - Print namespace header.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_namespace(struct tomoyo_io_buffer *head)
+{
+	if (!tomoyo_namespace_enabled)
+		return;
+	tomoyo_set_string(head,
+			  container_of(head->r.ns,
+				       struct tomoyo_policy_namespace,
+				       namespace_list)->name);
+	tomoyo_set_space(head);
+}
+
+/**
  * tomoyo_print_name_union - Print a tomoyo_name_union.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
@@ -192,7 +346,7 @@
 				    const struct tomoyo_name_union *ptr)
 {
 	tomoyo_set_space(head);
-	if (ptr->is_group) {
+	if (ptr->group) {
 		tomoyo_set_string(head, "@");
 		tomoyo_set_string(head, ptr->group->group_name->name);
 	} else {
@@ -201,24 +355,46 @@
 }
 
 /**
- * tomoyo_print_number_union - Print a tomoyo_number_union.
+ * tomoyo_print_name_union_quoted - Print a tomoyo_name_union with a quote.
  *
- * @head:       Pointer to "struct tomoyo_io_buffer".
- * @ptr:        Pointer to "struct tomoyo_number_union".
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr:  Pointer to "struct tomoyo_name_union".
+ *
+ * Returns nothing.
  */
-static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
-				      const struct tomoyo_number_union *ptr)
+static void tomoyo_print_name_union_quoted(struct tomoyo_io_buffer *head,
+					   const struct tomoyo_name_union *ptr)
 {
-	tomoyo_set_space(head);
-	if (ptr->is_group) {
+	if (ptr->group) {
+		tomoyo_set_string(head, "@");
+		tomoyo_set_string(head, ptr->group->group_name->name);
+	} else {
+		tomoyo_set_string(head, "\"");
+		tomoyo_set_string(head, ptr->filename->name);
+		tomoyo_set_string(head, "\"");
+	}
+}
+
+/**
+ * tomoyo_print_number_union_nospace - Print a tomoyo_number_union without a space.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr:  Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_number_union_nospace
+(struct tomoyo_io_buffer *head, const struct tomoyo_number_union *ptr)
+{
+	if (ptr->group) {
 		tomoyo_set_string(head, "@");
 		tomoyo_set_string(head, ptr->group->group_name->name);
 	} else {
 		int i;
 		unsigned long min = ptr->values[0];
 		const unsigned long max = ptr->values[1];
-		u8 min_type = ptr->min_type;
-		const u8 max_type = ptr->max_type;
+		u8 min_type = ptr->value_type[0];
+		const u8 max_type = ptr->value_type[1];
 		char buffer[128];
 		buffer[0] = '\0';
 		for (i = 0; i < 2; i++) {
@@ -232,8 +408,8 @@
 						 "0%lo", min);
 				break;
 			default:
-				tomoyo_addprintf(buffer, sizeof(buffer),
-						 "%lu", min);
+				tomoyo_addprintf(buffer, sizeof(buffer), "%lu",
+						 min);
 				break;
 			}
 			if (min == max && min_type == max_type)
@@ -247,35 +423,53 @@
 }
 
 /**
+ * tomoyo_print_number_union - Print a tomoyo_number_union.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr:  Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
+				      const struct tomoyo_number_union *ptr)
+{
+	tomoyo_set_space(head);
+	tomoyo_print_number_union_nospace(head, ptr);
+}
+
+/**
  * tomoyo_assign_profile - Create a new profile.
  *
+ * @ns:      Pointer to "struct tomoyo_policy_namespace".
  * @profile: Profile number to create.
  *
  * Returns pointer to "struct tomoyo_profile" on success, NULL otherwise.
  */
-static struct tomoyo_profile *tomoyo_assign_profile(const unsigned int profile)
+static struct tomoyo_profile *tomoyo_assign_profile
+(struct tomoyo_policy_namespace *ns, const unsigned int profile)
 {
 	struct tomoyo_profile *ptr;
 	struct tomoyo_profile *entry;
 	if (profile >= TOMOYO_MAX_PROFILES)
 		return NULL;
-	ptr = tomoyo_profile_ptr[profile];
+	ptr = ns->profile_ptr[profile];
 	if (ptr)
 		return ptr;
 	entry = kzalloc(sizeof(*entry), GFP_NOFS);
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		goto out;
-	ptr = tomoyo_profile_ptr[profile];
+	ptr = ns->profile_ptr[profile];
 	if (!ptr && tomoyo_memory_ok(entry)) {
 		ptr = entry;
-		ptr->learning = &tomoyo_default_profile.preference;
-		ptr->permissive = &tomoyo_default_profile.preference;
-		ptr->enforcing = &tomoyo_default_profile.preference;
-		ptr->default_config = TOMOYO_CONFIG_DISABLED;
+		ptr->default_config = TOMOYO_CONFIG_DISABLED |
+			TOMOYO_CONFIG_WANT_GRANT_LOG |
+			TOMOYO_CONFIG_WANT_REJECT_LOG;
 		memset(ptr->config, TOMOYO_CONFIG_USE_DEFAULT,
 		       sizeof(ptr->config));
+		ptr->pref[TOMOYO_PREF_MAX_AUDIT_LOG] = 1024;
+		ptr->pref[TOMOYO_PREF_MAX_LEARNING_ENTRY] = 2048;
 		mb(); /* Avoid out-of-order execution. */
-		tomoyo_profile_ptr[profile] = ptr;
+		ns->profile_ptr[profile] = ptr;
 		entry = NULL;
 	}
 	mutex_unlock(&tomoyo_policy_lock);
@@ -287,19 +481,29 @@
 /**
  * tomoyo_profile - Find a profile.
  *
+ * @ns:      Pointer to "struct tomoyo_policy_namespace".
  * @profile: Profile number to find.
  *
  * Returns pointer to "struct tomoyo_profile".
  */
-struct tomoyo_profile *tomoyo_profile(const u8 profile)
+struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
+				      const u8 profile)
 {
-	struct tomoyo_profile *ptr = tomoyo_profile_ptr[profile];
-	if (!tomoyo_policy_loaded)
-		return &tomoyo_default_profile;
-	BUG_ON(!ptr);
+	static struct tomoyo_profile tomoyo_null_profile;
+	struct tomoyo_profile *ptr = ns->profile_ptr[profile];
+	if (!ptr)
+		ptr = &tomoyo_null_profile;
 	return ptr;
 }
 
+/**
+ * tomoyo_find_yesno - Find values for specified keyword.
+ *
+ * @string: String to check.
+ * @find:   Name of keyword.
+ *
+ * Returns 1 if "@find=yes" was found, 0 if "@find=no" was found, -1 otherwise.
+ */
 static s8 tomoyo_find_yesno(const char *string, const char *find)
 {
 	const char *cp = strstr(string, find);
@@ -313,18 +517,15 @@
 	return -1;
 }
 
-static void tomoyo_set_bool(bool *b, const char *string, const char *find)
-{
-	switch (tomoyo_find_yesno(string, find)) {
-	case 1:
-		*b = true;
-		break;
-	case 0:
-		*b = false;
-		break;
-	}
-}
-
+/**
+ * tomoyo_set_uint - Set value for specified preference.
+ *
+ * @i:      Pointer to "unsigned int".
+ * @string: String to check.
+ * @find:   Name of keyword.
+ *
+ * Returns nothing.
+ */
 static void tomoyo_set_uint(unsigned int *i, const char *string,
 			    const char *find)
 {
@@ -333,51 +534,16 @@
 		sscanf(cp + strlen(find), "=%u", i);
 }
 
-static void tomoyo_set_pref(const char *name, const char *value,
-			    const bool use_default,
-			    struct tomoyo_profile *profile)
-{
-	struct tomoyo_preference **pref;
-	bool *verbose;
-	if (!strcmp(name, "enforcing")) {
-		if (use_default) {
-			pref = &profile->enforcing;
-			goto set_default;
-		}
-		profile->enforcing = &profile->preference;
-		verbose = &profile->preference.enforcing_verbose;
-		goto set_verbose;
-	}
-	if (!strcmp(name, "permissive")) {
-		if (use_default) {
-			pref = &profile->permissive;
-			goto set_default;
-		}
-		profile->permissive = &profile->preference;
-		verbose = &profile->preference.permissive_verbose;
-		goto set_verbose;
-	}
-	if (!strcmp(name, "learning")) {
-		if (use_default) {
-			pref = &profile->learning;
-			goto set_default;
-		}
-		profile->learning = &profile->preference;
-		tomoyo_set_uint(&profile->preference.learning_max_entry, value,
-			     "max_entry");
-		verbose = &profile->preference.learning_verbose;
-		goto set_verbose;
-	}
-	return;
- set_default:
-	*pref = &tomoyo_default_profile.preference;
-	return;
- set_verbose:
-	tomoyo_set_bool(verbose, value, "verbose");
-}
-
+/**
+ * tomoyo_set_mode - Set mode for specified profile.
+ *
+ * @name:    Name of functionality.
+ * @value:   Mode for @name.
+ * @profile: Pointer to "struct tomoyo_profile".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_set_mode(char *name, const char *value,
-			   const bool use_default,
 			   struct tomoyo_profile *profile)
 {
 	u8 i;
@@ -389,7 +555,17 @@
 		config = 0;
 		for (i = 0; i < TOMOYO_MAX_MAC_INDEX
 			     + TOMOYO_MAX_MAC_CATEGORY_INDEX; i++) {
-			if (strcmp(name, tomoyo_mac_keywords[i]))
+			int len = 0;
+			if (i < TOMOYO_MAX_MAC_INDEX) {
+				const u8 c = tomoyo_index2category[i];
+				const char *category =
+					tomoyo_category_keywords[c];
+				len = strlen(category);
+				if (strncmp(name, category, len) ||
+				    name[len++] != ':' || name[len++] != ':')
+					continue;
+			}
+			if (strcmp(name + len, tomoyo_mac_keywords[i]))
 				continue;
 			config = profile->config[i];
 			break;
@@ -399,7 +575,7 @@
 	} else {
 		return -EINVAL;
 	}
-	if (use_default) {
+	if (strstr(value, "use_default")) {
 		config = TOMOYO_CONFIG_USE_DEFAULT;
 	} else {
 		u8 mode;
@@ -410,6 +586,24 @@
 				 * 'config' from 'TOMOYO_CONFIG_USE_DEAFULT'.
 				 */
 				config = (config & ~7) | mode;
+		if (config != TOMOYO_CONFIG_USE_DEFAULT) {
+			switch (tomoyo_find_yesno(value, "grant_log")) {
+			case 1:
+				config |= TOMOYO_CONFIG_WANT_GRANT_LOG;
+				break;
+			case 0:
+				config &= ~TOMOYO_CONFIG_WANT_GRANT_LOG;
+				break;
+			}
+			switch (tomoyo_find_yesno(value, "reject_log")) {
+			case 1:
+				config |= TOMOYO_CONFIG_WANT_REJECT_LOG;
+				break;
+			case 0:
+				config &= ~TOMOYO_CONFIG_WANT_REJECT_LOG;
+				break;
+			}
+		}
 	}
 	if (i < TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX)
 		profile->config[i] = config;
@@ -429,34 +623,22 @@
 {
 	char *data = head->write_buf;
 	unsigned int i;
-	bool use_default = false;
 	char *cp;
 	struct tomoyo_profile *profile;
-	if (sscanf(data, "PROFILE_VERSION=%u", &tomoyo_profile_version) == 1)
+	if (sscanf(data, "PROFILE_VERSION=%u", &head->w.ns->profile_version)
+	    == 1)
 		return 0;
 	i = simple_strtoul(data, &cp, 10);
-	if (data == cp) {
-		profile = &tomoyo_default_profile;
-	} else {
-		if (*cp != '-')
-			return -EINVAL;
-		data = cp + 1;
-		profile = tomoyo_assign_profile(i);
-		if (!profile)
-			return -EINVAL;
-	}
+	if (*cp != '-')
+		return -EINVAL;
+	data = cp + 1;
+	profile = tomoyo_assign_profile(head->w.ns, i);
+	if (!profile)
+		return -EINVAL;
 	cp = strchr(data, '=');
 	if (!cp)
 		return -EINVAL;
 	*cp++ = '\0';
-	if (profile != &tomoyo_default_profile)
-		use_default = strstr(cp, "use_default") != NULL;
-	if (tomoyo_str_starts(&data, "PREFERENCE::")) {
-		tomoyo_set_pref(data, cp, use_default, profile);
-		return 0;
-	}
-	if (profile == &tomoyo_default_profile)
-		return -EINVAL;
 	if (!strcmp(data, "COMMENT")) {
 		static DEFINE_SPINLOCK(lock);
 		const struct tomoyo_path_info *new_comment
@@ -471,77 +653,62 @@
 		tomoyo_put_name(old_comment);
 		return 0;
 	}
-	return tomoyo_set_mode(data, cp, use_default, profile);
+	if (!strcmp(data, "PREFERENCE")) {
+		for (i = 0; i < TOMOYO_MAX_PREF; i++)
+			tomoyo_set_uint(&profile->pref[i], cp,
+					tomoyo_pref_keywords[i]);
+		return 0;
+	}
+	return tomoyo_set_mode(data, cp, profile);
 }
 
-static void tomoyo_print_preference(struct tomoyo_io_buffer *head,
-				    const int idx)
-{
-	struct tomoyo_preference *pref = &tomoyo_default_profile.preference;
-	const struct tomoyo_profile *profile = idx >= 0 ?
-		tomoyo_profile_ptr[idx] : NULL;
-	char buffer[16] = "";
-	if (profile) {
-		buffer[sizeof(buffer) - 1] = '\0';
-		snprintf(buffer, sizeof(buffer) - 1, "%u-", idx);
-	}
-	if (profile) {
-		pref = profile->learning;
-		if (pref == &tomoyo_default_profile.preference)
-			goto skip1;
-	}
-	tomoyo_io_printf(head, "%sPREFERENCE::%s={ "
-			 "verbose=%s max_entry=%u }\n",
-			 buffer, "learning",
-			 tomoyo_yesno(pref->learning_verbose),
-			 pref->learning_max_entry);
- skip1:
-	if (profile) {
-		pref = profile->permissive;
-		if (pref == &tomoyo_default_profile.preference)
-			goto skip2;
-	}
-	tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n",
-			 buffer, "permissive",
-			 tomoyo_yesno(pref->permissive_verbose));
- skip2:
-	if (profile) {
-		pref = profile->enforcing;
-		if (pref == &tomoyo_default_profile.preference)
-			return;
-	}
-	tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n",
-			 buffer, "enforcing",
-			 tomoyo_yesno(pref->enforcing_verbose));
-}
-
+/**
+ * tomoyo_print_config - Print mode for specified functionality.
+ *
+ * @head:   Pointer to "struct tomoyo_io_buffer".
+ * @config: Mode for that functionality.
+ *
+ * Returns nothing.
+ *
+ * Caller prints functionality's name.
+ */
 static void tomoyo_print_config(struct tomoyo_io_buffer *head, const u8 config)
 {
-	tomoyo_io_printf(head, "={ mode=%s }\n", tomoyo_mode[config & 3]);
+	tomoyo_io_printf(head, "={ mode=%s grant_log=%s reject_log=%s }\n",
+			 tomoyo_mode[config & 3],
+			 tomoyo_yesno(config & TOMOYO_CONFIG_WANT_GRANT_LOG),
+			 tomoyo_yesno(config & TOMOYO_CONFIG_WANT_REJECT_LOG));
 }
 
 /**
  * tomoyo_read_profile - Read profile table.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
  */
 static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
 {
 	u8 index;
+	struct tomoyo_policy_namespace *ns =
+		container_of(head->r.ns, typeof(*ns), namespace_list);
 	const struct tomoyo_profile *profile;
+	if (head->r.eof)
+		return;
  next:
 	index = head->r.index;
-	profile = tomoyo_profile_ptr[index];
+	profile = ns->profile_ptr[index];
 	switch (head->r.step) {
 	case 0:
-		tomoyo_io_printf(head, "PROFILE_VERSION=%s\n", "20090903");
-		tomoyo_print_preference(head, -1);
+		tomoyo_print_namespace(head);
+		tomoyo_io_printf(head, "PROFILE_VERSION=%u\n",
+				 ns->profile_version);
 		head->r.step++;
 		break;
 	case 1:
 		for ( ; head->r.index < TOMOYO_MAX_PROFILES;
 		      head->r.index++)
-			if (tomoyo_profile_ptr[head->r.index])
+			if (ns->profile_ptr[head->r.index])
 				break;
 		if (head->r.index == TOMOYO_MAX_PROFILES)
 			return;
@@ -549,16 +716,25 @@
 		break;
 	case 2:
 		{
+			u8 i;
 			const struct tomoyo_path_info *comment =
 				profile->comment;
+			tomoyo_print_namespace(head);
 			tomoyo_io_printf(head, "%u-COMMENT=", index);
 			tomoyo_set_string(head, comment ? comment->name : "");
 			tomoyo_set_lf(head);
+			tomoyo_io_printf(head, "%u-PREFERENCE={ ", index);
+			for (i = 0; i < TOMOYO_MAX_PREF; i++)
+				tomoyo_io_printf(head, "%s=%u ",
+						 tomoyo_pref_keywords[i],
+						 profile->pref[i]);
+			tomoyo_set_string(head, "}\n");
 			head->r.step++;
 		}
 		break;
 	case 3:
 		{
+			tomoyo_print_namespace(head);
 			tomoyo_io_printf(head, "%u-%s", index, "CONFIG");
 			tomoyo_print_config(head, profile->default_config);
 			head->r.bit = 0;
@@ -572,15 +748,22 @@
 			const u8 config = profile->config[i];
 			if (config == TOMOYO_CONFIG_USE_DEFAULT)
 				continue;
-			tomoyo_io_printf(head, "%u-%s%s", index, "CONFIG::",
-					 tomoyo_mac_keywords[i]);
+			tomoyo_print_namespace(head);
+			if (i < TOMOYO_MAX_MAC_INDEX)
+				tomoyo_io_printf(head, "%u-CONFIG::%s::%s",
+						 index,
+						 tomoyo_category_keywords
+						 [tomoyo_index2category[i]],
+						 tomoyo_mac_keywords[i]);
+			else
+				tomoyo_io_printf(head, "%u-CONFIG::%s", index,
+						 tomoyo_mac_keywords[i]);
 			tomoyo_print_config(head, config);
 			head->r.bit++;
 			break;
 		}
 		if (head->r.bit == TOMOYO_MAX_MAC_INDEX
 		    + TOMOYO_MAX_MAC_CATEGORY_INDEX) {
-			tomoyo_print_preference(head, index);
 			head->r.index++;
 			head->r.step = 1;
 		}
@@ -590,6 +773,14 @@
 		goto next;
 }
 
+/**
+ * tomoyo_same_manager - Check for duplicated "struct tomoyo_manager" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_manager(const struct tomoyo_acl_head *a,
 				const struct tomoyo_acl_head *b)
 {
@@ -611,8 +802,13 @@
 				       const bool is_delete)
 {
 	struct tomoyo_manager e = { };
-	int error;
-
+	struct tomoyo_acl_param param = {
+		/* .ns = &tomoyo_kernel_namespace, */
+		.is_delete = is_delete,
+		.list = &tomoyo_kernel_namespace.
+		policy_list[TOMOYO_ID_MANAGER],
+	};
+	int error = is_delete ? -ENOENT : -ENOMEM;
 	if (tomoyo_domain_def(manager)) {
 		if (!tomoyo_correct_domain(manager))
 			return -EINVAL;
@@ -622,12 +818,11 @@
 			return -EINVAL;
 	}
 	e.manager = tomoyo_get_name(manager);
-	if (!e.manager)
-		return -ENOMEM;
-	error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-				     &tomoyo_policy_list[TOMOYO_ID_MANAGER],
-				     tomoyo_same_manager);
-	tomoyo_put_name(e.manager);
+	if (e.manager) {
+		error = tomoyo_update_policy(&e.head, sizeof(e), &param,
+					     tomoyo_same_manager);
+		tomoyo_put_name(e.manager);
+	}
 	return error;
 }
 
@@ -643,13 +838,12 @@
 static int tomoyo_write_manager(struct tomoyo_io_buffer *head)
 {
 	char *data = head->write_buf;
-	bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE);
 
 	if (!strcmp(data, "manage_by_non_root")) {
-		tomoyo_manage_by_non_root = !is_delete;
+		tomoyo_manage_by_non_root = !head->w.is_delete;
 		return 0;
 	}
-	return tomoyo_update_manager_entry(data, is_delete);
+	return tomoyo_update_manager_entry(data, head->w.is_delete);
 }
 
 /**
@@ -663,8 +857,8 @@
 {
 	if (head->r.eof)
 		return;
-	list_for_each_cookie(head->r.acl,
-			     &tomoyo_policy_list[TOMOYO_ID_MANAGER]) {
+	list_for_each_cookie(head->r.acl, &tomoyo_kernel_namespace.
+			     policy_list[TOMOYO_ID_MANAGER]) {
 		struct tomoyo_manager *ptr =
 			list_entry(head->r.acl, typeof(*ptr), head.list);
 		if (ptr->head.is_deleted)
@@ -697,8 +891,8 @@
 		return true;
 	if (!tomoyo_manage_by_non_root && (task->cred->uid || task->cred->euid))
 		return false;
-	list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER],
-				head.list) {
+	list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.
+				policy_list[TOMOYO_ID_MANAGER], head.list) {
 		if (!ptr->head.is_deleted && ptr->is_domain
 		    && !tomoyo_pathcmp(domainname, ptr->manager)) {
 			found = true;
@@ -710,8 +904,8 @@
 	exe = tomoyo_get_exe();
 	if (!exe)
 		return false;
-	list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER],
-				head.list) {
+	list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.
+				policy_list[TOMOYO_ID_MANAGER], head.list) {
 		if (!ptr->head.is_deleted && !ptr->is_domain
 		    && !strcmp(exe, ptr->manager->name)) {
 			found = true;
@@ -732,7 +926,7 @@
 }
 
 /**
- * tomoyo_select_one - Parse select command.
+ * tomoyo_select_domain - Parse select command.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
  * @data: String to parse.
@@ -741,16 +935,15 @@
  *
  * Caller holds tomoyo_read_lock().
  */
-static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data)
+static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
+				 const char *data)
 {
 	unsigned int pid;
 	struct tomoyo_domain_info *domain = NULL;
 	bool global_pid = false;
-
-	if (!strcmp(data, "allow_execute")) {
-		head->r.print_execute_only = true;
-		return true;
-	}
+	if (strncmp(data, "select ", 7))
+		return false;
+	data += 7;
 	if (sscanf(data, "pid=%u", &pid) == 1 ||
 	    (global_pid = true, sscanf(data, "global-pid=%u", &pid) == 1)) {
 		struct task_struct *p;
@@ -769,7 +962,7 @@
 			domain = tomoyo_find_domain(data + 7);
 	} else
 		return false;
-	head->write_var1 = domain;
+	head->w.domain = domain;
 	/* Accessing read_buf is safe because head->io_sem is held. */
 	if (!head->read_buf)
 		return true; /* Do nothing if open(O_WRONLY). */
@@ -821,20 +1014,47 @@
 /**
  * tomoyo_write_domain2 - Write domain policy.
  *
- * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ns:        Pointer to "struct tomoyo_policy_namespace".
+ * @list:      Pointer to "struct list_head".
+ * @data:      Policy to be interpreted.
+ * @is_delete: True if it is a delete request.
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_write_domain2(char *data, struct tomoyo_domain_info *domain,
+static int tomoyo_write_domain2(struct tomoyo_policy_namespace *ns,
+				struct list_head *list, char *data,
 				const bool is_delete)
 {
-	if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_ALLOW_MOUNT))
-		return tomoyo_write_mount(data, domain, is_delete);
-	return tomoyo_write_file(data, domain, is_delete);
+	struct tomoyo_acl_param param = {
+		.ns = ns,
+		.list = list,
+		.data = data,
+		.is_delete = is_delete,
+	};
+	static const struct {
+		const char *keyword;
+		int (*write) (struct tomoyo_acl_param *);
+	} tomoyo_callback[1] = {
+		{ "file ", tomoyo_write_file },
+	};
+	u8 i;
+	for (i = 0; i < 1; i++) {
+		if (!tomoyo_str_starts(&param.data,
+				       tomoyo_callback[i].keyword))
+			continue;
+		return tomoyo_callback[i].write(&param);
+	}
+	return -EINVAL;
 }
 
+/* String table for domain flags. */
+const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS] = {
+	[TOMOYO_DIF_QUOTA_WARNED]      = "quota_exceeded\n",
+	[TOMOYO_DIF_TRANSITION_FAILED] = "transition_failed\n",
+};
+
 /**
  * tomoyo_write_domain - Write domain policy.
  *
@@ -847,69 +1067,198 @@
 static int tomoyo_write_domain(struct tomoyo_io_buffer *head)
 {
 	char *data = head->write_buf;
-	struct tomoyo_domain_info *domain = head->write_var1;
-	bool is_delete = false;
-	bool is_select = false;
+	struct tomoyo_policy_namespace *ns;
+	struct tomoyo_domain_info *domain = head->w.domain;
+	const bool is_delete = head->w.is_delete;
+	bool is_select = !is_delete && tomoyo_str_starts(&data, "select ");
 	unsigned int profile;
-
-	if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE))
-		is_delete = true;
-	else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_SELECT))
-		is_select = true;
-	if (is_select && tomoyo_select_one(head, data))
-		return 0;
-	/* Don't allow updating policies by non manager programs. */
-	if (!tomoyo_manager())
-		return -EPERM;
-	if (tomoyo_domain_def(data)) {
+	if (*data == '<') {
 		domain = NULL;
 		if (is_delete)
 			tomoyo_delete_domain(data);
 		else if (is_select)
 			domain = tomoyo_find_domain(data);
 		else
-			domain = tomoyo_assign_domain(data, 0);
-		head->write_var1 = domain;
+			domain = tomoyo_assign_domain(data, false);
+		head->w.domain = domain;
 		return 0;
 	}
 	if (!domain)
 		return -EINVAL;
-
-	if (sscanf(data, TOMOYO_KEYWORD_USE_PROFILE "%u", &profile) == 1
+	ns = domain->ns;
+	if (sscanf(data, "use_profile %u", &profile) == 1
 	    && profile < TOMOYO_MAX_PROFILES) {
-		if (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded)
+		if (!tomoyo_policy_loaded || ns->profile_ptr[profile])
 			domain->profile = (u8) profile;
 		return 0;
 	}
-	if (!strcmp(data, TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ)) {
-		domain->ignore_global_allow_read = !is_delete;
+	if (sscanf(data, "use_group %u\n", &profile) == 1
+	    && profile < TOMOYO_MAX_ACL_GROUPS) {
+		if (!is_delete)
+			domain->group = (u8) profile;
 		return 0;
 	}
-	if (!strcmp(data, TOMOYO_KEYWORD_QUOTA_EXCEEDED)) {
-		domain->quota_warned = !is_delete;
+	for (profile = 0; profile < TOMOYO_MAX_DOMAIN_INFO_FLAGS; profile++) {
+		const char *cp = tomoyo_dif[profile];
+		if (strncmp(data, cp, strlen(cp) - 1))
+			continue;
+		domain->flags[profile] = !is_delete;
 		return 0;
 	}
-	if (!strcmp(data, TOMOYO_KEYWORD_TRANSITION_FAILED)) {
-		domain->transition_failed = !is_delete;
-		return 0;
-	}
-	return tomoyo_write_domain2(data, domain, is_delete);
+	return tomoyo_write_domain2(ns, &domain->acl_info_list, data,
+				    is_delete);
 }
 
 /**
- * tomoyo_fns - Find next set bit.
+ * tomoyo_print_condition - Print condition part.
  *
- * @perm: 8 bits value.
- * @bit:  First bit to find.
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @cond: Pointer to "struct tomoyo_condition".
  *
- * Returns next on-bit on success, 8 otherwise.
+ * Returns true on success, false otherwise.
  */
-static u8 tomoyo_fns(const u8 perm, u8 bit)
+static bool tomoyo_print_condition(struct tomoyo_io_buffer *head,
+				   const struct tomoyo_condition *cond)
 {
-	for ( ; bit < 8; bit++)
-		if (perm & (1 << bit))
+	switch (head->r.cond_step) {
+	case 0:
+		head->r.cond_index = 0;
+		head->r.cond_step++;
+		/* fall through */
+	case 1:
+		{
+			const u16 condc = cond->condc;
+			const struct tomoyo_condition_element *condp =
+				(typeof(condp)) (cond + 1);
+			const struct tomoyo_number_union *numbers_p =
+				(typeof(numbers_p)) (condp + condc);
+			const struct tomoyo_name_union *names_p =
+				(typeof(names_p))
+				(numbers_p + cond->numbers_count);
+			const struct tomoyo_argv *argv =
+				(typeof(argv)) (names_p + cond->names_count);
+			const struct tomoyo_envp *envp =
+				(typeof(envp)) (argv + cond->argc);
+			u16 skip;
+			for (skip = 0; skip < head->r.cond_index; skip++) {
+				const u8 left = condp->left;
+				const u8 right = condp->right;
+				condp++;
+				switch (left) {
+				case TOMOYO_ARGV_ENTRY:
+					argv++;
+					continue;
+				case TOMOYO_ENVP_ENTRY:
+					envp++;
+					continue;
+				case TOMOYO_NUMBER_UNION:
+					numbers_p++;
+					break;
+				}
+				switch (right) {
+				case TOMOYO_NAME_UNION:
+					names_p++;
+					break;
+				case TOMOYO_NUMBER_UNION:
+					numbers_p++;
+					break;
+				}
+			}
+			while (head->r.cond_index < condc) {
+				const u8 match = condp->equals;
+				const u8 left = condp->left;
+				const u8 right = condp->right;
+				if (!tomoyo_flush(head))
+					return false;
+				condp++;
+				head->r.cond_index++;
+				tomoyo_set_space(head);
+				switch (left) {
+				case TOMOYO_ARGV_ENTRY:
+					tomoyo_io_printf(head,
+							 "exec.argv[%lu]%s=\"",
+							 argv->index, argv->
+							 is_not ? "!" : "");
+					tomoyo_set_string(head,
+							  argv->value->name);
+					tomoyo_set_string(head, "\"");
+					argv++;
+					continue;
+				case TOMOYO_ENVP_ENTRY:
+					tomoyo_set_string(head,
+							  "exec.envp[\"");
+					tomoyo_set_string(head,
+							  envp->name->name);
+					tomoyo_io_printf(head, "\"]%s=", envp->
+							 is_not ? "!" : "");
+					if (envp->value) {
+						tomoyo_set_string(head, "\"");
+						tomoyo_set_string(head, envp->
+								  value->name);
+						tomoyo_set_string(head, "\"");
+					} else {
+						tomoyo_set_string(head,
+								  "NULL");
+					}
+					envp++;
+					continue;
+				case TOMOYO_NUMBER_UNION:
+					tomoyo_print_number_union_nospace
+						(head, numbers_p++);
+					break;
+				default:
+					tomoyo_set_string(head,
+					       tomoyo_condition_keyword[left]);
+					break;
+				}
+				tomoyo_set_string(head, match ? "=" : "!=");
+				switch (right) {
+				case TOMOYO_NAME_UNION:
+					tomoyo_print_name_union_quoted
+						(head, names_p++);
+					break;
+				case TOMOYO_NUMBER_UNION:
+					tomoyo_print_number_union_nospace
+						(head, numbers_p++);
+					break;
+				default:
+					tomoyo_set_string(head,
+					  tomoyo_condition_keyword[right]);
+					break;
+				}
+			}
+		}
+		head->r.cond_step++;
+		/* fall through */
+	case 2:
+		if (!tomoyo_flush(head))
 			break;
-	return bit;
+		head->r.cond_step++;
+		/* fall through */
+	case 3:
+		tomoyo_set_lf(head);
+		return true;
+	}
+	return false;
+}
+
+/**
+ * tomoyo_set_group - Print "acl_group " header keyword and category name.
+ *
+ * @head:     Pointer to "struct tomoyo_io_buffer".
+ * @category: Category name.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_set_group(struct tomoyo_io_buffer *head,
+			     const char *category)
+{
+	if (head->type == TOMOYO_EXCEPTIONPOLICY) {
+		tomoyo_print_namespace(head);
+		tomoyo_io_printf(head, "acl_group %u ",
+				 head->r.acl_group_index);
+	}
+	tomoyo_set_string(head, category);
 }
 
 /**
@@ -924,63 +1273,96 @@
 			       struct tomoyo_acl_info *acl)
 {
 	const u8 acl_type = acl->type;
+	bool first = true;
 	u8 bit;
 
+	if (head->r.print_cond_part)
+		goto print_cond_part;
 	if (acl->is_deleted)
 		return true;
- next:
-	bit = head->r.bit;
 	if (!tomoyo_flush(head))
 		return false;
 	else if (acl_type == TOMOYO_TYPE_PATH_ACL) {
 		struct tomoyo_path_acl *ptr =
 			container_of(acl, typeof(*ptr), head);
 		const u16 perm = ptr->perm;
-		for ( ; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
+		for (bit = 0; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
 			if (!(perm & (1 << bit)))
 				continue;
-			if (head->r.print_execute_only &&
+			if (head->r.print_transition_related_only &&
 			    bit != TOMOYO_TYPE_EXECUTE)
 				continue;
-			/* Print "read/write" instead of "read" and "write". */
-			if ((bit == TOMOYO_TYPE_READ ||
-			     bit == TOMOYO_TYPE_WRITE)
-			    && (perm & (1 << TOMOYO_TYPE_READ_WRITE)))
-				continue;
-			break;
+			if (first) {
+				tomoyo_set_group(head, "file ");
+				first = false;
+			} else {
+				tomoyo_set_slash(head);
+			}
+			tomoyo_set_string(head, tomoyo_path_keyword[bit]);
 		}
-		if (bit >= TOMOYO_MAX_PATH_OPERATION)
-			goto done;
-		tomoyo_io_printf(head, "allow_%s", tomoyo_path_keyword[bit]);
+		if (first)
+			return true;
 		tomoyo_print_name_union(head, &ptr->name);
-	} else if (head->r.print_execute_only) {
+	} else if (head->r.print_transition_related_only) {
 		return true;
 	} else if (acl_type == TOMOYO_TYPE_PATH2_ACL) {
 		struct tomoyo_path2_acl *ptr =
 			container_of(acl, typeof(*ptr), head);
-		bit = tomoyo_fns(ptr->perm, bit);
-		if (bit >= TOMOYO_MAX_PATH2_OPERATION)
-			goto done;
-		tomoyo_io_printf(head, "allow_%s", tomoyo_path2_keyword[bit]);
+		const u8 perm = ptr->perm;
+		for (bit = 0; bit < TOMOYO_MAX_PATH2_OPERATION; bit++) {
+			if (!(perm & (1 << bit)))
+				continue;
+			if (first) {
+				tomoyo_set_group(head, "file ");
+				first = false;
+			} else {
+				tomoyo_set_slash(head);
+			}
+			tomoyo_set_string(head, tomoyo_mac_keywords
+					  [tomoyo_pp2mac[bit]]);
+		}
+		if (first)
+			return true;
 		tomoyo_print_name_union(head, &ptr->name1);
 		tomoyo_print_name_union(head, &ptr->name2);
 	} else if (acl_type == TOMOYO_TYPE_PATH_NUMBER_ACL) {
 		struct tomoyo_path_number_acl *ptr =
 			container_of(acl, typeof(*ptr), head);
-		bit = tomoyo_fns(ptr->perm, bit);
-		if (bit >= TOMOYO_MAX_PATH_NUMBER_OPERATION)
-			goto done;
-		tomoyo_io_printf(head, "allow_%s",
-				 tomoyo_path_number_keyword[bit]);
+		const u8 perm = ptr->perm;
+		for (bit = 0; bit < TOMOYO_MAX_PATH_NUMBER_OPERATION; bit++) {
+			if (!(perm & (1 << bit)))
+				continue;
+			if (first) {
+				tomoyo_set_group(head, "file ");
+				first = false;
+			} else {
+				tomoyo_set_slash(head);
+			}
+			tomoyo_set_string(head, tomoyo_mac_keywords
+					  [tomoyo_pn2mac[bit]]);
+		}
+		if (first)
+			return true;
 		tomoyo_print_name_union(head, &ptr->name);
 		tomoyo_print_number_union(head, &ptr->number);
 	} else if (acl_type == TOMOYO_TYPE_MKDEV_ACL) {
 		struct tomoyo_mkdev_acl *ptr =
 			container_of(acl, typeof(*ptr), head);
-		bit = tomoyo_fns(ptr->perm, bit);
-		if (bit >= TOMOYO_MAX_MKDEV_OPERATION)
-			goto done;
-		tomoyo_io_printf(head, "allow_%s", tomoyo_mkdev_keyword[bit]);
+		const u8 perm = ptr->perm;
+		for (bit = 0; bit < TOMOYO_MAX_MKDEV_OPERATION; bit++) {
+			if (!(perm & (1 << bit)))
+				continue;
+			if (first) {
+				tomoyo_set_group(head, "file ");
+				first = false;
+			} else {
+				tomoyo_set_slash(head);
+			}
+			tomoyo_set_string(head, tomoyo_mac_keywords
+					  [tomoyo_pnnn2mac[bit]]);
+		}
+		if (first)
+			return true;
 		tomoyo_print_name_union(head, &ptr->name);
 		tomoyo_print_number_union(head, &ptr->mode);
 		tomoyo_print_number_union(head, &ptr->major);
@@ -988,35 +1370,41 @@
 	} else if (acl_type == TOMOYO_TYPE_MOUNT_ACL) {
 		struct tomoyo_mount_acl *ptr =
 			container_of(acl, typeof(*ptr), head);
-		tomoyo_io_printf(head, "allow_mount");
+		tomoyo_set_group(head, "file mount");
 		tomoyo_print_name_union(head, &ptr->dev_name);
 		tomoyo_print_name_union(head, &ptr->dir_name);
 		tomoyo_print_name_union(head, &ptr->fs_type);
 		tomoyo_print_number_union(head, &ptr->flags);
 	}
-	head->r.bit = bit + 1;
-	tomoyo_io_printf(head, "\n");
-	if (acl_type != TOMOYO_TYPE_MOUNT_ACL)
-		goto next;
- done:
-	head->r.bit = 0;
+	if (acl->cond) {
+		head->r.print_cond_part = true;
+		head->r.cond_step = 0;
+		if (!tomoyo_flush(head))
+			return false;
+print_cond_part:
+		if (!tomoyo_print_condition(head, acl->cond))
+			return false;
+		head->r.print_cond_part = false;
+	} else {
+		tomoyo_set_lf(head);
+	}
 	return true;
 }
 
 /**
  * tomoyo_read_domain2 - Read domain policy.
  *
- * @head:   Pointer to "struct tomoyo_io_buffer".
- * @domain: Pointer to "struct tomoyo_domain_info".
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @list: Pointer to "struct list_head".
  *
  * Caller holds tomoyo_read_lock().
  *
  * Returns true on success, false otherwise.
  */
 static bool tomoyo_read_domain2(struct tomoyo_io_buffer *head,
-				struct tomoyo_domain_info *domain)
+				struct list_head *list)
 {
-	list_for_each_cookie(head->r.acl, &domain->acl_info_list) {
+	list_for_each_cookie(head->r.acl, list) {
 		struct tomoyo_acl_info *ptr =
 			list_entry(head->r.acl, typeof(*ptr), list);
 		if (!tomoyo_print_entry(head, ptr))
@@ -1041,6 +1429,7 @@
 		struct tomoyo_domain_info *domain =
 			list_entry(head->r.domain, typeof(*domain), list);
 		switch (head->r.step) {
+			u8 i;
 		case 0:
 			if (domain->is_deleted &&
 			    !head->r.print_this_domain_only)
@@ -1048,22 +1437,18 @@
 			/* Print domainname and flags. */
 			tomoyo_set_string(head, domain->domainname->name);
 			tomoyo_set_lf(head);
-			tomoyo_io_printf(head,
-					 TOMOYO_KEYWORD_USE_PROFILE "%u\n",
+			tomoyo_io_printf(head, "use_profile %u\n",
 					 domain->profile);
-			if (domain->quota_warned)
-				tomoyo_set_string(head, "quota_exceeded\n");
-			if (domain->transition_failed)
-				tomoyo_set_string(head, "transition_failed\n");
-			if (domain->ignore_global_allow_read)
-				tomoyo_set_string(head,
-				       TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ
-						  "\n");
+			tomoyo_io_printf(head, "use_group %u\n",
+					 domain->group);
+			for (i = 0; i < TOMOYO_MAX_DOMAIN_INFO_FLAGS; i++)
+				if (domain->flags[i])
+					tomoyo_set_string(head, tomoyo_dif[i]);
 			head->r.step++;
 			tomoyo_set_lf(head);
 			/* fall through */
 		case 1:
-			if (!tomoyo_read_domain2(head, domain))
+			if (!tomoyo_read_domain2(head, &domain->acl_info_list))
 				return;
 			head->r.step++;
 			if (!tomoyo_set_lf(head))
@@ -1080,73 +1465,6 @@
 }
 
 /**
- * tomoyo_write_domain_profile - Assign profile for specified domain.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns 0 on success, -EINVAL otherwise.
- *
- * This is equivalent to doing
- *
- *     ( echo "select " $domainname; echo "use_profile " $profile ) |
- *     /usr/sbin/tomoyo-loadpolicy -d
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_write_domain_profile(struct tomoyo_io_buffer *head)
-{
-	char *data = head->write_buf;
-	char *cp = strchr(data, ' ');
-	struct tomoyo_domain_info *domain;
-	unsigned long profile;
-
-	if (!cp)
-		return -EINVAL;
-	*cp = '\0';
-	domain = tomoyo_find_domain(cp + 1);
-	if (strict_strtoul(data, 10, &profile))
-		return -EINVAL;
-	if (domain && profile < TOMOYO_MAX_PROFILES
-	    && (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded))
-		domain->profile = (u8) profile;
-	return 0;
-}
-
-/**
- * tomoyo_read_domain_profile - Read only domainname and profile.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns list of profile number and domainname pairs.
- *
- * This is equivalent to doing
- *
- *     grep -A 1 '^<kernel>' /sys/kernel/security/tomoyo/domain_policy |
- *     awk ' { if ( domainname == "" ) { if ( $1 == "<kernel>" )
- *     domainname = $0; } else if ( $1 == "use_profile" ) {
- *     print $2 " " domainname; domainname = ""; } } ; '
- *
- * Caller holds tomoyo_read_lock().
- */
-static void tomoyo_read_domain_profile(struct tomoyo_io_buffer *head)
-{
-	if (head->r.eof)
-		return;
-	list_for_each_cookie(head->r.domain, &tomoyo_domain_list) {
-		struct tomoyo_domain_info *domain =
-			list_entry(head->r.domain, typeof(*domain), list);
-		if (domain->is_deleted)
-			continue;
-		if (!tomoyo_flush(head))
-			return;
-		tomoyo_io_printf(head, "%u ", domain->profile);
-		tomoyo_set_string(head, domain->domainname->name);
-		tomoyo_set_lf(head);
-	}
-	head->r.eof = true;
-}
-
-/**
  * tomoyo_write_pid: Specify PID to obtain domainname.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
@@ -1204,18 +1522,20 @@
 	tomoyo_set_string(head, domain->domainname->name);
 }
 
+/* String table for domain transition control keywords. */
 static const char *tomoyo_transition_type[TOMOYO_MAX_TRANSITION_TYPE] = {
-	[TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE]
-	= TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN,
-	[TOMOYO_TRANSITION_CONTROL_INITIALIZE]
-	= TOMOYO_KEYWORD_INITIALIZE_DOMAIN,
-	[TOMOYO_TRANSITION_CONTROL_NO_KEEP] = TOMOYO_KEYWORD_NO_KEEP_DOMAIN,
-	[TOMOYO_TRANSITION_CONTROL_KEEP] = TOMOYO_KEYWORD_KEEP_DOMAIN
+	[TOMOYO_TRANSITION_CONTROL_NO_RESET]      = "no_reset_domain ",
+	[TOMOYO_TRANSITION_CONTROL_RESET]         = "reset_domain ",
+	[TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE] = "no_initialize_domain ",
+	[TOMOYO_TRANSITION_CONTROL_INITIALIZE]    = "initialize_domain ",
+	[TOMOYO_TRANSITION_CONTROL_NO_KEEP]       = "no_keep_domain ",
+	[TOMOYO_TRANSITION_CONTROL_KEEP]          = "keep_domain ",
 };
 
+/* String table for grouping keywords. */
 static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = {
-	[TOMOYO_PATH_GROUP] = TOMOYO_KEYWORD_PATH_GROUP,
-	[TOMOYO_NUMBER_GROUP] = TOMOYO_KEYWORD_NUMBER_GROUP
+	[TOMOYO_PATH_GROUP]   = "path_group ",
+	[TOMOYO_NUMBER_GROUP] = "number_group ",
 };
 
 /**
@@ -1229,29 +1549,30 @@
  */
 static int tomoyo_write_exception(struct tomoyo_io_buffer *head)
 {
-	char *data = head->write_buf;
-	bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE);
-	u8 i;
-	static const struct {
-		const char *keyword;
-		int (*write) (char *, const bool);
-	} tomoyo_callback[4] = {
-		{ TOMOYO_KEYWORD_AGGREGATOR, tomoyo_write_aggregator },
-		{ TOMOYO_KEYWORD_FILE_PATTERN, tomoyo_write_pattern },
-		{ TOMOYO_KEYWORD_DENY_REWRITE, tomoyo_write_no_rewrite },
-		{ TOMOYO_KEYWORD_ALLOW_READ, tomoyo_write_globally_readable },
+	const bool is_delete = head->w.is_delete;
+	struct tomoyo_acl_param param = {
+		.ns = head->w.ns,
+		.is_delete = is_delete,
+		.data = head->write_buf,
 	};
-
+	u8 i;
+	if (tomoyo_str_starts(&param.data, "aggregator "))
+		return tomoyo_write_aggregator(&param);
 	for (i = 0; i < TOMOYO_MAX_TRANSITION_TYPE; i++)
-		if (tomoyo_str_starts(&data, tomoyo_transition_type[i]))
-			return tomoyo_write_transition_control(data, is_delete,
-							       i);
-	for (i = 0; i < 4; i++)
-		if (tomoyo_str_starts(&data, tomoyo_callback[i].keyword))
-			return tomoyo_callback[i].write(data, is_delete);
+		if (tomoyo_str_starts(&param.data, tomoyo_transition_type[i]))
+			return tomoyo_write_transition_control(&param, i);
 	for (i = 0; i < TOMOYO_MAX_GROUP; i++)
-		if (tomoyo_str_starts(&data, tomoyo_group_name[i]))
-			return tomoyo_write_group(data, is_delete, i);
+		if (tomoyo_str_starts(&param.data, tomoyo_group_name[i]))
+			return tomoyo_write_group(&param, i);
+	if (tomoyo_str_starts(&param.data, "acl_group ")) {
+		unsigned int group;
+		char *data;
+		group = simple_strtoul(param.data, &data, 10);
+		if (group < TOMOYO_MAX_ACL_GROUPS && *data++ == ' ')
+			return tomoyo_write_domain2
+				(head->w.ns, &head->w.ns->acl_group[group],
+				 data, is_delete);
+	}
 	return -EINVAL;
 }
 
@@ -1267,9 +1588,12 @@
  */
 static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx)
 {
-	list_for_each_cookie(head->r.group, &tomoyo_group_list[idx]) {
+	struct tomoyo_policy_namespace *ns =
+		container_of(head->r.ns, typeof(*ns), namespace_list);
+	struct list_head *list = &ns->group_list[idx];
+	list_for_each_cookie(head->r.group, list) {
 		struct tomoyo_group *group =
-			list_entry(head->r.group, typeof(*group), list);
+			list_entry(head->r.group, typeof(*group), head.list);
 		list_for_each_cookie(head->r.acl, &group->member_list) {
 			struct tomoyo_acl_head *ptr =
 				list_entry(head->r.acl, typeof(*ptr), list);
@@ -1277,6 +1601,7 @@
 				continue;
 			if (!tomoyo_flush(head))
 				return false;
+			tomoyo_print_namespace(head);
 			tomoyo_set_string(head, tomoyo_group_name[idx]);
 			tomoyo_set_string(head, group->group_name->name);
 			if (idx == TOMOYO_PATH_GROUP) {
@@ -1310,7 +1635,10 @@
  */
 static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
 {
-	list_for_each_cookie(head->r.acl, &tomoyo_policy_list[idx]) {
+	struct tomoyo_policy_namespace *ns =
+		container_of(head->r.ns, typeof(*ns), namespace_list);
+	struct list_head *list = &ns->policy_list[idx];
+	list_for_each_cookie(head->r.acl, list) {
 		struct tomoyo_acl_head *acl =
 			container_of(head->r.acl, typeof(*acl), list);
 		if (acl->is_deleted)
@@ -1322,35 +1650,23 @@
 			{
 				struct tomoyo_transition_control *ptr =
 					container_of(acl, typeof(*ptr), head);
-				tomoyo_set_string(head,
-						  tomoyo_transition_type
+				tomoyo_print_namespace(head);
+				tomoyo_set_string(head, tomoyo_transition_type
 						  [ptr->type]);
-				if (ptr->program)
-					tomoyo_set_string(head,
-							  ptr->program->name);
-				if (ptr->program && ptr->domainname)
-					tomoyo_set_string(head, " from ");
-				if (ptr->domainname)
-					tomoyo_set_string(head,
-							  ptr->domainname->
-							  name);
-			}
-			break;
-		case TOMOYO_ID_GLOBALLY_READABLE:
-			{
-				struct tomoyo_readable_file *ptr =
-					container_of(acl, typeof(*ptr), head);
-				tomoyo_set_string(head,
-						  TOMOYO_KEYWORD_ALLOW_READ);
-				tomoyo_set_string(head, ptr->filename->name);
+				tomoyo_set_string(head, ptr->program ?
+						  ptr->program->name : "any");
+				tomoyo_set_string(head, " from ");
+				tomoyo_set_string(head, ptr->domainname ?
+						  ptr->domainname->name :
+						  "any");
 			}
 			break;
 		case TOMOYO_ID_AGGREGATOR:
 			{
 				struct tomoyo_aggregator *ptr =
 					container_of(acl, typeof(*ptr), head);
-				tomoyo_set_string(head,
-						  TOMOYO_KEYWORD_AGGREGATOR);
+				tomoyo_print_namespace(head);
+				tomoyo_set_string(head, "aggregator ");
 				tomoyo_set_string(head,
 						  ptr->original_name->name);
 				tomoyo_set_space(head);
@@ -1358,24 +1674,6 @@
 					       ptr->aggregated_name->name);
 			}
 			break;
-		case TOMOYO_ID_PATTERN:
-			{
-				struct tomoyo_no_pattern *ptr =
-					container_of(acl, typeof(*ptr), head);
-				tomoyo_set_string(head,
-						  TOMOYO_KEYWORD_FILE_PATTERN);
-				tomoyo_set_string(head, ptr->pattern->name);
-			}
-			break;
-		case TOMOYO_ID_NO_REWRITE:
-			{
-				struct tomoyo_no_rewrite *ptr =
-					container_of(acl, typeof(*ptr), head);
-				tomoyo_set_string(head,
-						  TOMOYO_KEYWORD_DENY_REWRITE);
-				tomoyo_set_string(head, ptr->pattern->name);
-			}
-			break;
 		default:
 			continue;
 		}
@@ -1394,6 +1692,8 @@
  */
 static void tomoyo_read_exception(struct tomoyo_io_buffer *head)
 {
+	struct tomoyo_policy_namespace *ns =
+		container_of(head->r.ns, typeof(*ns), namespace_list);
 	if (head->r.eof)
 		return;
 	while (head->r.step < TOMOYO_MAX_POLICY &&
@@ -1406,95 +1706,40 @@
 		head->r.step++;
 	if (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP)
 		return;
+	while (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP
+	       + TOMOYO_MAX_ACL_GROUPS) {
+		head->r.acl_group_index = head->r.step - TOMOYO_MAX_POLICY
+			- TOMOYO_MAX_GROUP;
+		if (!tomoyo_read_domain2(head, &ns->acl_group
+					 [head->r.acl_group_index]))
+			return;
+		head->r.step++;
+	}
 	head->r.eof = true;
 }
 
-/**
- * tomoyo_print_header - Get header line of audit log.
- *
- * @r: Pointer to "struct tomoyo_request_info".
- *
- * Returns string representation.
- *
- * This function uses kmalloc(), so caller must kfree() if this function
- * didn't return NULL.
- */
-static char *tomoyo_print_header(struct tomoyo_request_info *r)
-{
-	struct timeval tv;
-	const pid_t gpid = task_pid_nr(current);
-	static const int tomoyo_buffer_len = 4096;
-	char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
-	pid_t ppid;
-	if (!buffer)
-		return NULL;
-	do_gettimeofday(&tv);
-	rcu_read_lock();
-	ppid = task_tgid_vnr(current->real_parent);
-	rcu_read_unlock();
-	snprintf(buffer, tomoyo_buffer_len - 1,
-		 "#timestamp=%lu profile=%u mode=%s (global-pid=%u)"
-		 " task={ pid=%u ppid=%u uid=%u gid=%u euid=%u"
-		 " egid=%u suid=%u sgid=%u fsuid=%u fsgid=%u }",
-		 tv.tv_sec, r->profile, tomoyo_mode[r->mode], gpid,
-		 task_tgid_vnr(current), ppid,
-		 current_uid(), current_gid(), current_euid(),
-		 current_egid(), current_suid(), current_sgid(),
-		 current_fsuid(), current_fsgid());
-	return buffer;
-}
-
-/**
- * tomoyo_init_audit_log - Allocate buffer for audit logs.
- *
- * @len: Required size.
- * @r:   Pointer to "struct tomoyo_request_info".
- *
- * Returns pointer to allocated memory.
- *
- * The @len is updated to add the header lines' size on success.
- *
- * This function uses kzalloc(), so caller must kfree() if this function
- * didn't return NULL.
- */
-static char *tomoyo_init_audit_log(int *len, struct tomoyo_request_info *r)
-{
-	char *buf = NULL;
-	const char *header;
-	const char *domainname;
-	if (!r->domain)
-		r->domain = tomoyo_domain();
-	domainname = r->domain->domainname->name;
-	header = tomoyo_print_header(r);
-	if (!header)
-		return NULL;
-	*len += strlen(domainname) + strlen(header) + 10;
-	buf = kzalloc(*len, GFP_NOFS);
-	if (buf)
-		snprintf(buf, (*len) - 1, "%s\n%s\n", header, domainname);
-	kfree(header);
-	return buf;
-}
-
-/* Wait queue for tomoyo_query_list. */
+/* Wait queue for kernel -> userspace notification. */
 static DECLARE_WAIT_QUEUE_HEAD(tomoyo_query_wait);
-
-/* Lock for manipulating tomoyo_query_list. */
-static DEFINE_SPINLOCK(tomoyo_query_list_lock);
+/* Wait queue for userspace -> kernel notification. */
+static DECLARE_WAIT_QUEUE_HEAD(tomoyo_answer_wait);
 
 /* Structure for query. */
 struct tomoyo_query {
 	struct list_head list;
 	char *query;
-	int query_len;
+	size_t query_len;
 	unsigned int serial;
-	int timer;
-	int answer;
+	u8 timer;
+	u8 answer;
+	u8 retry;
 };
 
 /* The list for "struct tomoyo_query". */
 static LIST_HEAD(tomoyo_query_list);
 
+/* Lock for manipulating tomoyo_query_list. */
+static DEFINE_SPINLOCK(tomoyo_query_list_lock);
+
 /*
  * Number of "struct file" referring /sys/kernel/security/tomoyo/query
  * interface.
@@ -1502,10 +1747,82 @@
 static atomic_t tomoyo_query_observers = ATOMIC_INIT(0);
 
 /**
+ * tomoyo_truncate - Truncate a line.
+ *
+ * @str: String to truncate.
+ *
+ * Returns length of truncated @str.
+ */
+static int tomoyo_truncate(char *str)
+{
+	char *start = str;
+	while (*(unsigned char *) str > (unsigned char) ' ')
+		str++;
+	*str = '\0';
+	return strlen(start) + 1;
+}
+
+/**
+ * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode.
+ *
+ * @domain: Pointer to "struct tomoyo_domain_info".
+ * @header: Lines containing ACL.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header)
+{
+	char *buffer;
+	char *realpath = NULL;
+	char *argv0 = NULL;
+	char *symlink = NULL;
+	char *cp = strchr(header, '\n');
+	int len;
+	if (!cp)
+		return;
+	cp = strchr(cp + 1, '\n');
+	if (!cp)
+		return;
+	*cp++ = '\0';
+	len = strlen(cp) + 1;
+	/* strstr() will return NULL if ordering is wrong. */
+	if (*cp == 'f') {
+		argv0 = strstr(header, " argv[]={ \"");
+		if (argv0) {
+			argv0 += 10;
+			len += tomoyo_truncate(argv0) + 14;
+		}
+		realpath = strstr(header, " exec={ realpath=\"");
+		if (realpath) {
+			realpath += 8;
+			len += tomoyo_truncate(realpath) + 6;
+		}
+		symlink = strstr(header, " symlink.target=\"");
+		if (symlink)
+			len += tomoyo_truncate(symlink + 1) + 1;
+	}
+	buffer = kmalloc(len, GFP_NOFS);
+	if (!buffer)
+		return;
+	snprintf(buffer, len - 1, "%s", cp);
+	if (realpath)
+		tomoyo_addprintf(buffer, len, " exec.%s", realpath);
+	if (argv0)
+		tomoyo_addprintf(buffer, len, " exec.argv[0]=%s", argv0);
+	if (symlink)
+		tomoyo_addprintf(buffer, len, "%s", symlink);
+	tomoyo_normalize_line(buffer);
+	if (!tomoyo_write_domain2(domain->ns, &domain->acl_info_list, buffer,
+				  false))
+		tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
+	kfree(buffer);
+}
+
+/**
  * tomoyo_supervisor - Ask for the supervisor's decision.
  *
- * @r:       Pointer to "struct tomoyo_request_info".
- * @fmt:     The printf()'s format string, followed by parameters.
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @fmt: The printf()'s format string, followed by parameters.
  *
  * Returns 0 if the supervisor decided to permit the access request which
  * violated the policy in enforcing mode, TOMOYO_RETRY_REQUEST if the
@@ -1515,88 +1832,79 @@
 int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
 {
 	va_list args;
-	int error = -EPERM;
-	int pos;
+	int error;
 	int len;
 	static unsigned int tomoyo_serial;
-	struct tomoyo_query *entry = NULL;
+	struct tomoyo_query entry = { };
 	bool quota_exceeded = false;
-	char *header;
+	va_start(args, fmt);
+	len = vsnprintf((char *) &len, 1, fmt, args) + 1;
+	va_end(args);
+	/* Write /sys/kernel/security/tomoyo/audit. */
+	va_start(args, fmt);
+	tomoyo_write_log2(r, len, fmt, args);
+	va_end(args);
+	/* Nothing more to do if granted. */
+	if (r->granted)
+		return 0;
+	if (r->mode)
+		tomoyo_update_stat(r->mode);
 	switch (r->mode) {
-		char *buffer;
+	case TOMOYO_CONFIG_ENFORCING:
+		error = -EPERM;
+		if (atomic_read(&tomoyo_query_observers))
+			break;
+		goto out;
 	case TOMOYO_CONFIG_LEARNING:
-		if (!tomoyo_domain_quota_is_ok(r))
-			return 0;
-		va_start(args, fmt);
-		len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 4;
-		va_end(args);
-		buffer = kmalloc(len, GFP_NOFS);
-		if (!buffer)
-			return 0;
-		va_start(args, fmt);
-		vsnprintf(buffer, len - 1, fmt, args);
-		va_end(args);
-		tomoyo_normalize_line(buffer);
-		tomoyo_write_domain2(buffer, r->domain, false);
-		kfree(buffer);
+		error = 0;
+		/* Check max_learning_entry parameter. */
+		if (tomoyo_domain_quota_is_ok(r))
+			break;
 		/* fall through */
-	case TOMOYO_CONFIG_PERMISSIVE:
+	default:
 		return 0;
 	}
-	if (!r->domain)
-		r->domain = tomoyo_domain();
-	if (!atomic_read(&tomoyo_query_observers))
-		return -EPERM;
+	/* Get message. */
 	va_start(args, fmt);
-	len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 32;
+	entry.query = tomoyo_init_log(r, len, fmt, args);
 	va_end(args);
-	header = tomoyo_init_audit_log(&len, r);
-	if (!header)
+	if (!entry.query)
 		goto out;
-	entry = kzalloc(sizeof(*entry), GFP_NOFS);
-	if (!entry)
+	entry.query_len = strlen(entry.query) + 1;
+	if (!error) {
+		tomoyo_add_entry(r->domain, entry.query);
 		goto out;
-	entry->query = kzalloc(len, GFP_NOFS);
-	if (!entry->query)
-		goto out;
-	len = ksize(entry->query);
+	}
+	len = tomoyo_round2(entry.query_len);
 	spin_lock(&tomoyo_query_list_lock);
-	if (tomoyo_quota_for_query && tomoyo_query_memory_size + len +
-	    sizeof(*entry) >= tomoyo_quota_for_query) {
+	if (tomoyo_memory_quota[TOMOYO_MEMORY_QUERY] &&
+	    tomoyo_memory_used[TOMOYO_MEMORY_QUERY] + len
+	    >= tomoyo_memory_quota[TOMOYO_MEMORY_QUERY]) {
 		quota_exceeded = true;
 	} else {
-		tomoyo_query_memory_size += len + sizeof(*entry);
-		entry->serial = tomoyo_serial++;
+		entry.serial = tomoyo_serial++;
+		entry.retry = r->retry;
+		tomoyo_memory_used[TOMOYO_MEMORY_QUERY] += len;
+		list_add_tail(&entry.list, &tomoyo_query_list);
 	}
 	spin_unlock(&tomoyo_query_list_lock);
 	if (quota_exceeded)
 		goto out;
-	pos = snprintf(entry->query, len - 1, "Q%u-%hu\n%s",
-		       entry->serial, r->retry, header);
-	kfree(header);
-	header = NULL;
-	va_start(args, fmt);
-	vsnprintf(entry->query + pos, len - 1 - pos, fmt, args);
-	entry->query_len = strlen(entry->query) + 1;
-	va_end(args);
-	spin_lock(&tomoyo_query_list_lock);
-	list_add_tail(&entry->list, &tomoyo_query_list);
-	spin_unlock(&tomoyo_query_list_lock);
 	/* Give 10 seconds for supervisor's opinion. */
-	for (entry->timer = 0;
-	     atomic_read(&tomoyo_query_observers) && entry->timer < 100;
-	     entry->timer++) {
-		wake_up(&tomoyo_query_wait);
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 10);
-		if (entry->answer)
+	while (entry.timer < 10) {
+		wake_up_all(&tomoyo_query_wait);
+		if (wait_event_interruptible_timeout
+		    (tomoyo_answer_wait, entry.answer ||
+		     !atomic_read(&tomoyo_query_observers), HZ))
 			break;
+		else
+			entry.timer++;
 	}
 	spin_lock(&tomoyo_query_list_lock);
-	list_del(&entry->list);
-	tomoyo_query_memory_size -= len + sizeof(*entry);
+	list_del(&entry.list);
+	tomoyo_memory_used[TOMOYO_MEMORY_QUERY] -= len;
 	spin_unlock(&tomoyo_query_list_lock);
-	switch (entry->answer) {
+	switch (entry.answer) {
 	case 3: /* Asked to retry by administrator. */
 		error = TOMOYO_RETRY_REQUEST;
 		r->retry++;
@@ -1605,18 +1913,12 @@
 		/* Granted by administrator. */
 		error = 0;
 		break;
-	case 0:
-		/* Timed out. */
-		break;
 	default:
-		/* Rejected by administrator. */
+		/* Timed out or rejected by administrator. */
 		break;
 	}
- out:
-	if (entry)
-		kfree(entry->query);
-	kfree(entry);
-	kfree(header);
+out:
+	kfree(entry.query);
 	return error;
 }
 
@@ -1663,8 +1965,8 @@
 static void tomoyo_read_query(struct tomoyo_io_buffer *head)
 {
 	struct list_head *tmp;
-	int pos = 0;
-	int len = 0;
+	unsigned int pos = 0;
+	size_t len = 0;
 	char *buf;
 	if (head->r.w_pos)
 		return;
@@ -1687,7 +1989,7 @@
 		head->r.query_index = 0;
 		return;
 	}
-	buf = kzalloc(len, GFP_NOFS);
+	buf = kzalloc(len + 32, GFP_NOFS);
 	if (!buf)
 		return;
 	pos = 0;
@@ -1703,7 +2005,8 @@
 		 * can change, but I don't care.
 		 */
 		if (len == ptr->query_len)
-			memmove(buf, ptr->query, len);
+			snprintf(buf, len + 31, "Q%u-%hu\n%s", ptr->serial,
+				 ptr->retry, ptr->query);
 		break;
 	}
 	spin_unlock(&tomoyo_query_list_lock);
@@ -1760,7 +2063,7 @@
 static void tomoyo_read_version(struct tomoyo_io_buffer *head)
 {
 	if (!head->r.eof) {
-		tomoyo_io_printf(head, "2.3.0");
+		tomoyo_io_printf(head, "2.4.0");
 		head->r.eof = true;
 	}
 }
@@ -1785,15 +2088,111 @@
 	}
 }
 
+/* String table for /sys/kernel/security/tomoyo/stat interface. */
+static const char * const tomoyo_policy_headers[TOMOYO_MAX_POLICY_STAT] = {
+	[TOMOYO_STAT_POLICY_UPDATES]    = "update:",
+	[TOMOYO_STAT_POLICY_LEARNING]   = "violation in learning mode:",
+	[TOMOYO_STAT_POLICY_PERMISSIVE] = "violation in permissive mode:",
+	[TOMOYO_STAT_POLICY_ENFORCING]  = "violation in enforcing mode:",
+};
+
+/* String table for /sys/kernel/security/tomoyo/stat interface. */
+static const char * const tomoyo_memory_headers[TOMOYO_MAX_MEMORY_STAT] = {
+	[TOMOYO_MEMORY_POLICY] = "policy:",
+	[TOMOYO_MEMORY_AUDIT]  = "audit log:",
+	[TOMOYO_MEMORY_QUERY]  = "query message:",
+};
+
+/* Timestamp counter for last updated. */
+static unsigned int tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT];
+/* Counter for number of updates. */
+static unsigned int tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT];
+
+/**
+ * tomoyo_update_stat - Update statistic counters.
+ *
+ * @index: Index for policy type.
+ *
+ * Returns nothing.
+ */
+void tomoyo_update_stat(const u8 index)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+	/*
+	 * I don't use atomic operations because race condition is not fatal.
+	 */
+	tomoyo_stat_updated[index]++;
+	tomoyo_stat_modified[index] = tv.tv_sec;
+}
+
+/**
+ * tomoyo_read_stat - Read statistic data.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_read_stat(struct tomoyo_io_buffer *head)
+{
+	u8 i;
+	unsigned int total = 0;
+	if (head->r.eof)
+		return;
+	for (i = 0; i < TOMOYO_MAX_POLICY_STAT; i++) {
+		tomoyo_io_printf(head, "Policy %-30s %10u",
+				 tomoyo_policy_headers[i],
+				 tomoyo_stat_updated[i]);
+		if (tomoyo_stat_modified[i]) {
+			struct tomoyo_time stamp;
+			tomoyo_convert_time(tomoyo_stat_modified[i], &stamp);
+			tomoyo_io_printf(head, " (Last: %04u/%02u/%02u "
+					 "%02u:%02u:%02u)",
+					 stamp.year, stamp.month, stamp.day,
+					 stamp.hour, stamp.min, stamp.sec);
+		}
+		tomoyo_set_lf(head);
+	}
+	for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++) {
+		unsigned int used = tomoyo_memory_used[i];
+		total += used;
+		tomoyo_io_printf(head, "Memory used by %-22s %10u",
+				 tomoyo_memory_headers[i], used);
+		used = tomoyo_memory_quota[i];
+		if (used)
+			tomoyo_io_printf(head, " (Quota: %10u)", used);
+		tomoyo_set_lf(head);
+	}
+	tomoyo_io_printf(head, "Total memory used:                    %10u\n",
+			 total);
+	head->r.eof = true;
+}
+
+/**
+ * tomoyo_write_stat - Set memory quota.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns 0.
+ */
+static int tomoyo_write_stat(struct tomoyo_io_buffer *head)
+{
+	char *data = head->write_buf;
+	u8 i;
+	if (tomoyo_str_starts(&data, "Memory used by "))
+		for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++)
+			if (tomoyo_str_starts(&data, tomoyo_memory_headers[i]))
+				sscanf(data, "%u", &tomoyo_memory_quota[i]);
+	return 0;
+}
+
 /**
  * tomoyo_open_control - open() for /sys/kernel/security/tomoyo/ interface.
  *
  * @type: Type of interface.
  * @file: Pointer to "struct file".
  *
- * Associates policy handler and returns 0 on success, -ENOMEM otherwise.
- *
- * Caller acquires tomoyo_read_lock().
+ * Returns 0 on success, negative value otherwise.
  */
 int tomoyo_open_control(const u8 type, struct file *file)
 {
@@ -1814,15 +2213,15 @@
 		head->write = tomoyo_write_exception;
 		head->read = tomoyo_read_exception;
 		break;
+	case TOMOYO_AUDIT:
+		/* /sys/kernel/security/tomoyo/audit */
+		head->poll = tomoyo_poll_log;
+		head->read = tomoyo_read_log;
+		break;
 	case TOMOYO_SELFDOMAIN:
 		/* /sys/kernel/security/tomoyo/self_domain */
 		head->read = tomoyo_read_self_domain;
 		break;
-	case TOMOYO_DOMAIN_STATUS:
-		/* /sys/kernel/security/tomoyo/.domain_status */
-		head->write = tomoyo_write_domain_profile;
-		head->read = tomoyo_read_domain_profile;
-		break;
 	case TOMOYO_PROCESS_STATUS:
 		/* /sys/kernel/security/tomoyo/.process_status */
 		head->write = tomoyo_write_pid;
@@ -1833,11 +2232,11 @@
 		head->read = tomoyo_read_version;
 		head->readbuf_size = 128;
 		break;
-	case TOMOYO_MEMINFO:
-		/* /sys/kernel/security/tomoyo/meminfo */
-		head->write = tomoyo_write_memory_quota;
-		head->read = tomoyo_read_memory_counter;
-		head->readbuf_size = 512;
+	case TOMOYO_STAT:
+		/* /sys/kernel/security/tomoyo/stat */
+		head->write = tomoyo_write_stat;
+		head->read = tomoyo_read_stat;
+		head->readbuf_size = 1024;
 		break;
 	case TOMOYO_PROFILE:
 		/* /sys/kernel/security/tomoyo/profile */
@@ -1887,26 +2286,16 @@
 			return -ENOMEM;
 		}
 	}
-	if (type != TOMOYO_QUERY)
-		head->reader_idx = tomoyo_read_lock();
-	file->private_data = head;
-	/*
-	 * Call the handler now if the file is
-	 * /sys/kernel/security/tomoyo/self_domain
-	 * so that the user can use
-	 * cat < /sys/kernel/security/tomoyo/self_domain"
-	 * to know the current process's domainname.
-	 */
-	if (type == TOMOYO_SELFDOMAIN)
-		tomoyo_read_control(file, NULL, 0);
 	/*
 	 * If the file is /sys/kernel/security/tomoyo/query , increment the
 	 * observer counter.
 	 * The obserber counter is used by tomoyo_supervisor() to see if
 	 * there is some process monitoring /sys/kernel/security/tomoyo/query.
 	 */
-	else if (type == TOMOYO_QUERY)
+	if (type == TOMOYO_QUERY)
 		atomic_inc(&tomoyo_query_observers);
+	file->private_data = head;
+	tomoyo_notify_gc(head, true);
 	return 0;
 }
 
@@ -1917,7 +2306,8 @@
  * @wait: Pointer to "poll_table".
  *
  * Waits for read readiness.
- * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd .
+ * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd and
+ * /sys/kernel/security/tomoyo/audit is handled by /usr/sbin/tomoyo-auditd.
  */
 int tomoyo_poll_control(struct file *file, poll_table *wait)
 {
@@ -1928,21 +2318,58 @@
 }
 
 /**
+ * tomoyo_set_namespace_cursor - Set namespace to read.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static inline void tomoyo_set_namespace_cursor(struct tomoyo_io_buffer *head)
+{
+	struct list_head *ns;
+	if (head->type != TOMOYO_EXCEPTIONPOLICY &&
+	    head->type != TOMOYO_PROFILE)
+		return;
+	/*
+	 * If this is the first read, or reading previous namespace finished
+	 * and has more namespaces to read, update the namespace cursor.
+	 */
+	ns = head->r.ns;
+	if (!ns || (head->r.eof && ns->next != &tomoyo_namespace_list)) {
+		/* Clearing is OK because tomoyo_flush() returned true. */
+		memset(&head->r, 0, sizeof(head->r));
+		head->r.ns = ns ? ns->next : tomoyo_namespace_list.next;
+	}
+}
+
+/**
+ * tomoyo_has_more_namespace - Check for unread namespaces.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns true if we have more entries to print, false otherwise.
+ */
+static inline bool tomoyo_has_more_namespace(struct tomoyo_io_buffer *head)
+{
+	return (head->type == TOMOYO_EXCEPTIONPOLICY ||
+		head->type == TOMOYO_PROFILE) && head->r.eof &&
+		head->r.ns->next != &tomoyo_namespace_list;
+}
+
+/**
  * tomoyo_read_control - read() for /sys/kernel/security/tomoyo/ interface.
  *
- * @file:       Pointer to "struct file".
+ * @head:       Pointer to "struct tomoyo_io_buffer".
  * @buffer:     Poiner to buffer to write to.
  * @buffer_len: Size of @buffer.
  *
  * Returns bytes read on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
  */
-int tomoyo_read_control(struct file *file, char __user *buffer,
-			const int buffer_len)
+ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
+			    const int buffer_len)
 {
 	int len;
-	struct tomoyo_io_buffer *head = file->private_data;
+	int idx;
 
 	if (!head->read)
 		return -ENOSYS;
@@ -1950,64 +2377,156 @@
 		return -EINTR;
 	head->read_user_buf = buffer;
 	head->read_user_buf_avail = buffer_len;
+	idx = tomoyo_read_lock();
 	if (tomoyo_flush(head))
 		/* Call the policy handler. */
-		head->read(head);
-	tomoyo_flush(head);
+		do {
+			tomoyo_set_namespace_cursor(head);
+			head->read(head);
+		} while (tomoyo_flush(head) &&
+			 tomoyo_has_more_namespace(head));
+	tomoyo_read_unlock(idx);
 	len = head->read_user_buf - buffer;
 	mutex_unlock(&head->io_sem);
 	return len;
 }
 
 /**
+ * tomoyo_parse_policy - Parse a policy line.
+ *
+ * @head: Poiter to "struct tomoyo_io_buffer".
+ * @line: Line to parse.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+static int tomoyo_parse_policy(struct tomoyo_io_buffer *head, char *line)
+{
+	/* Delete request? */
+	head->w.is_delete = !strncmp(line, "delete ", 7);
+	if (head->w.is_delete)
+		memmove(line, line + 7, strlen(line + 7) + 1);
+	/* Selecting namespace to update. */
+	if (head->type == TOMOYO_EXCEPTIONPOLICY ||
+	    head->type == TOMOYO_PROFILE) {
+		if (*line == '<') {
+			char *cp = strchr(line, ' ');
+			if (cp) {
+				*cp++ = '\0';
+				head->w.ns = tomoyo_assign_namespace(line);
+				memmove(line, cp, strlen(cp) + 1);
+			} else
+				head->w.ns = NULL;
+		} else
+			head->w.ns = &tomoyo_kernel_namespace;
+		/* Don't allow updating if namespace is invalid. */
+		if (!head->w.ns)
+			return -ENOENT;
+	}
+	/* Do the update. */
+	return head->write(head);
+}
+
+/**
  * tomoyo_write_control - write() for /sys/kernel/security/tomoyo/ interface.
  *
- * @file:       Pointer to "struct file".
+ * @head:       Pointer to "struct tomoyo_io_buffer".
  * @buffer:     Pointer to buffer to read from.
  * @buffer_len: Size of @buffer.
  *
  * Returns @buffer_len on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_control(struct file *file, const char __user *buffer,
-			 const int buffer_len)
+ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+			     const char __user *buffer, const int buffer_len)
 {
-	struct tomoyo_io_buffer *head = file->private_data;
 	int error = buffer_len;
-	int avail_len = buffer_len;
+	size_t avail_len = buffer_len;
 	char *cp0 = head->write_buf;
-
+	int idx;
 	if (!head->write)
 		return -ENOSYS;
 	if (!access_ok(VERIFY_READ, buffer, buffer_len))
 		return -EFAULT;
-	/* Don't allow updating policies by non manager programs. */
-	if (head->write != tomoyo_write_pid &&
-	    head->write != tomoyo_write_domain && !tomoyo_manager())
-		return -EPERM;
 	if (mutex_lock_interruptible(&head->io_sem))
 		return -EINTR;
+	idx = tomoyo_read_lock();
 	/* Read a line and dispatch it to the policy handler. */
 	while (avail_len > 0) {
 		char c;
-		if (head->write_avail >= head->writebuf_size - 1) {
-			error = -ENOMEM;
-			break;
-		} else if (get_user(c, buffer)) {
+		if (head->w.avail >= head->writebuf_size - 1) {
+			const int len = head->writebuf_size * 2;
+			char *cp = kzalloc(len, GFP_NOFS);
+			if (!cp) {
+				error = -ENOMEM;
+				break;
+			}
+			memmove(cp, cp0, head->w.avail);
+			kfree(cp0);
+			head->write_buf = cp;
+			cp0 = cp;
+			head->writebuf_size = len;
+		}
+		if (get_user(c, buffer)) {
 			error = -EFAULT;
 			break;
 		}
 		buffer++;
 		avail_len--;
-		cp0[head->write_avail++] = c;
+		cp0[head->w.avail++] = c;
 		if (c != '\n')
 			continue;
-		cp0[head->write_avail - 1] = '\0';
-		head->write_avail = 0;
+		cp0[head->w.avail - 1] = '\0';
+		head->w.avail = 0;
 		tomoyo_normalize_line(cp0);
-		head->write(head);
+		if (!strcmp(cp0, "reset")) {
+			head->w.ns = &tomoyo_kernel_namespace;
+			head->w.domain = NULL;
+			memset(&head->r, 0, sizeof(head->r));
+			continue;
+		}
+		/* Don't allow updating policies by non manager programs. */
+		switch (head->type) {
+		case TOMOYO_PROCESS_STATUS:
+			/* This does not write anything. */
+			break;
+		case TOMOYO_DOMAINPOLICY:
+			if (tomoyo_select_domain(head, cp0))
+				continue;
+			/* fall through */
+		case TOMOYO_EXCEPTIONPOLICY:
+			if (!strcmp(cp0, "select transition_only")) {
+				head->r.print_transition_related_only = true;
+				continue;
+			}
+			/* fall through */
+		default:
+			if (!tomoyo_manager()) {
+				error = -EPERM;
+				goto out;
+			}
+		}
+		switch (tomoyo_parse_policy(head, cp0)) {
+		case -EPERM:
+			error = -EPERM;
+			goto out;
+		case 0:
+			switch (head->type) {
+			case TOMOYO_DOMAINPOLICY:
+			case TOMOYO_EXCEPTIONPOLICY:
+			case TOMOYO_STAT:
+			case TOMOYO_PROFILE:
+			case TOMOYO_MANAGER:
+				tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
+				break;
+			default:
+				break;
+			}
+			break;
+		}
 	}
+out:
+	tomoyo_read_unlock(idx);
 	mutex_unlock(&head->io_sem);
 	return error;
 }
@@ -2015,35 +2534,20 @@
 /**
  * tomoyo_close_control - close() for /sys/kernel/security/tomoyo/ interface.
  *
- * @file: Pointer to "struct file".
+ * @head: Pointer to "struct tomoyo_io_buffer".
  *
- * Releases memory and returns 0.
- *
- * Caller looses tomoyo_read_lock().
+ * Returns 0.
  */
-int tomoyo_close_control(struct file *file)
+int tomoyo_close_control(struct tomoyo_io_buffer *head)
 {
-	struct tomoyo_io_buffer *head = file->private_data;
-	const bool is_write = !!head->write_buf;
-
 	/*
 	 * If the file is /sys/kernel/security/tomoyo/query , decrement the
 	 * observer counter.
 	 */
-	if (head->type == TOMOYO_QUERY)
-		atomic_dec(&tomoyo_query_observers);
-	else
-		tomoyo_read_unlock(head->reader_idx);
-	/* Release memory used for policy I/O. */
-	kfree(head->read_buf);
-	head->read_buf = NULL;
-	kfree(head->write_buf);
-	head->write_buf = NULL;
-	kfree(head);
-	head = NULL;
-	file->private_data = NULL;
-	if (is_write)
-		tomoyo_run_gc();
+	if (head->type == TOMOYO_QUERY &&
+	    atomic_dec_and_test(&tomoyo_query_observers))
+		wake_up_all(&tomoyo_answer_wait);
+	tomoyo_notify_gc(head, false);
 	return 0;
 }
 
@@ -2055,27 +2559,90 @@
 	struct tomoyo_domain_info *domain;
 	const int idx = tomoyo_read_lock();
 	tomoyo_policy_loaded = true;
-	/* Check all profiles currently assigned to domains are defined. */
+	printk(KERN_INFO "TOMOYO: 2.4.0\n");
 	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
 		const u8 profile = domain->profile;
-		if (tomoyo_profile_ptr[profile])
+		const struct tomoyo_policy_namespace *ns = domain->ns;
+		if (ns->profile_version != 20100903)
+			printk(KERN_ERR
+			       "Profile version %u is not supported.\n",
+			       ns->profile_version);
+		else if (!ns->profile_ptr[profile])
+			printk(KERN_ERR
+			       "Profile %u (used by '%s') is not defined.\n",
+			       profile, domain->domainname->name);
+		else
 			continue;
-		printk(KERN_ERR "You need to define profile %u before using it.\n",
-		       profile);
-		printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ "
+		printk(KERN_ERR
+		       "Userland tools for TOMOYO 2.4 must be installed and "
+		       "policy must be initialized.\n");
+		printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.4/ "
 		       "for more information.\n");
-		panic("Profile %u (used by '%s') not defined.\n",
-		      profile, domain->domainname->name);
+		panic("STOP!");
 	}
 	tomoyo_read_unlock(idx);
-	if (tomoyo_profile_version != 20090903) {
-		printk(KERN_ERR "You need to install userland programs for "
-		       "TOMOYO 2.3 and initialize policy configuration.\n");
-		printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ "
-		       "for more information.\n");
-		panic("Profile version %u is not supported.\n",
-		      tomoyo_profile_version);
-	}
-	printk(KERN_INFO "TOMOYO: 2.3.0\n");
 	printk(KERN_INFO "Mandatory Access Control activated.\n");
 }
+
+/**
+ * tomoyo_load_builtin_policy - Load built-in policy.
+ *
+ * Returns nothing.
+ */
+void __init tomoyo_load_builtin_policy(void)
+{
+	/*
+	 * This include file is manually created and contains built-in policy
+	 * named "tomoyo_builtin_profile", "tomoyo_builtin_exception_policy",
+	 * "tomoyo_builtin_domain_policy", "tomoyo_builtin_manager",
+	 * "tomoyo_builtin_stat" in the form of "static char [] __initdata".
+	 */
+#include "builtin-policy.h"
+	u8 i;
+	const int idx = tomoyo_read_lock();
+	for (i = 0; i < 5; i++) {
+		struct tomoyo_io_buffer head = { };
+		char *start = "";
+		switch (i) {
+		case 0:
+			start = tomoyo_builtin_profile;
+			head.type = TOMOYO_PROFILE;
+			head.write = tomoyo_write_profile;
+			break;
+		case 1:
+			start = tomoyo_builtin_exception_policy;
+			head.type = TOMOYO_EXCEPTIONPOLICY;
+			head.write = tomoyo_write_exception;
+			break;
+		case 2:
+			start = tomoyo_builtin_domain_policy;
+			head.type = TOMOYO_DOMAINPOLICY;
+			head.write = tomoyo_write_domain;
+			break;
+		case 3:
+			start = tomoyo_builtin_manager;
+			head.type = TOMOYO_MANAGER;
+			head.write = tomoyo_write_manager;
+			break;
+		case 4:
+			start = tomoyo_builtin_stat;
+			head.type = TOMOYO_STAT;
+			head.write = tomoyo_write_stat;
+			break;
+		}
+		while (1) {
+			char *end = strchr(start, '\n');
+			if (!end)
+				break;
+			*end = '\0';
+			tomoyo_normalize_line(start);
+			head.write_buf = start;
+			tomoyo_parse_policy(&head, start);
+			start = end + 1;
+		}
+	}
+	tomoyo_read_unlock(idx);
+#ifdef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+	tomoyo_check_profile();
+#endif
+}
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 7c66bd8..f7fbaa6 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -21,7 +21,8 @@
 #include <linux/list.h>
 #include <linux/cred.h>
 #include <linux/poll.h>
-struct linux_binprm;
+#include <linux/binfmts.h>
+#include <linux/highmem.h>
 
 /********** Constants definitions. **********/
 
@@ -38,66 +39,149 @@
 /* Profile number is an integer between 0 and 255. */
 #define TOMOYO_MAX_PROFILES 256
 
+/* Group number is an integer between 0 and 255. */
+#define TOMOYO_MAX_ACL_GROUPS 256
+
+/* Index numbers for "struct tomoyo_condition". */
+enum tomoyo_conditions_index {
+	TOMOYO_TASK_UID,             /* current_uid()   */
+	TOMOYO_TASK_EUID,            /* current_euid()  */
+	TOMOYO_TASK_SUID,            /* current_suid()  */
+	TOMOYO_TASK_FSUID,           /* current_fsuid() */
+	TOMOYO_TASK_GID,             /* current_gid()   */
+	TOMOYO_TASK_EGID,            /* current_egid()  */
+	TOMOYO_TASK_SGID,            /* current_sgid()  */
+	TOMOYO_TASK_FSGID,           /* current_fsgid() */
+	TOMOYO_TASK_PID,             /* sys_getpid()   */
+	TOMOYO_TASK_PPID,            /* sys_getppid()  */
+	TOMOYO_EXEC_ARGC,            /* "struct linux_binprm *"->argc */
+	TOMOYO_EXEC_ENVC,            /* "struct linux_binprm *"->envc */
+	TOMOYO_TYPE_IS_SOCKET,       /* S_IFSOCK */
+	TOMOYO_TYPE_IS_SYMLINK,      /* S_IFLNK */
+	TOMOYO_TYPE_IS_FILE,         /* S_IFREG */
+	TOMOYO_TYPE_IS_BLOCK_DEV,    /* S_IFBLK */
+	TOMOYO_TYPE_IS_DIRECTORY,    /* S_IFDIR */
+	TOMOYO_TYPE_IS_CHAR_DEV,     /* S_IFCHR */
+	TOMOYO_TYPE_IS_FIFO,         /* S_IFIFO */
+	TOMOYO_MODE_SETUID,          /* S_ISUID */
+	TOMOYO_MODE_SETGID,          /* S_ISGID */
+	TOMOYO_MODE_STICKY,          /* S_ISVTX */
+	TOMOYO_MODE_OWNER_READ,      /* S_IRUSR */
+	TOMOYO_MODE_OWNER_WRITE,     /* S_IWUSR */
+	TOMOYO_MODE_OWNER_EXECUTE,   /* S_IXUSR */
+	TOMOYO_MODE_GROUP_READ,      /* S_IRGRP */
+	TOMOYO_MODE_GROUP_WRITE,     /* S_IWGRP */
+	TOMOYO_MODE_GROUP_EXECUTE,   /* S_IXGRP */
+	TOMOYO_MODE_OTHERS_READ,     /* S_IROTH */
+	TOMOYO_MODE_OTHERS_WRITE,    /* S_IWOTH */
+	TOMOYO_MODE_OTHERS_EXECUTE,  /* S_IXOTH */
+	TOMOYO_EXEC_REALPATH,
+	TOMOYO_SYMLINK_TARGET,
+	TOMOYO_PATH1_UID,
+	TOMOYO_PATH1_GID,
+	TOMOYO_PATH1_INO,
+	TOMOYO_PATH1_MAJOR,
+	TOMOYO_PATH1_MINOR,
+	TOMOYO_PATH1_PERM,
+	TOMOYO_PATH1_TYPE,
+	TOMOYO_PATH1_DEV_MAJOR,
+	TOMOYO_PATH1_DEV_MINOR,
+	TOMOYO_PATH2_UID,
+	TOMOYO_PATH2_GID,
+	TOMOYO_PATH2_INO,
+	TOMOYO_PATH2_MAJOR,
+	TOMOYO_PATH2_MINOR,
+	TOMOYO_PATH2_PERM,
+	TOMOYO_PATH2_TYPE,
+	TOMOYO_PATH2_DEV_MAJOR,
+	TOMOYO_PATH2_DEV_MINOR,
+	TOMOYO_PATH1_PARENT_UID,
+	TOMOYO_PATH1_PARENT_GID,
+	TOMOYO_PATH1_PARENT_INO,
+	TOMOYO_PATH1_PARENT_PERM,
+	TOMOYO_PATH2_PARENT_UID,
+	TOMOYO_PATH2_PARENT_GID,
+	TOMOYO_PATH2_PARENT_INO,
+	TOMOYO_PATH2_PARENT_PERM,
+	TOMOYO_MAX_CONDITION_KEYWORD,
+	TOMOYO_NUMBER_UNION,
+	TOMOYO_NAME_UNION,
+	TOMOYO_ARGV_ENTRY,
+	TOMOYO_ENVP_ENTRY,
+};
+
+
+/* Index numbers for stat(). */
+enum tomoyo_path_stat_index {
+	/* Do not change this order. */
+	TOMOYO_PATH1,
+	TOMOYO_PATH1_PARENT,
+	TOMOYO_PATH2,
+	TOMOYO_PATH2_PARENT,
+	TOMOYO_MAX_PATH_STAT
+};
+
+/* Index numbers for operation mode. */
 enum tomoyo_mode_index {
 	TOMOYO_CONFIG_DISABLED,
 	TOMOYO_CONFIG_LEARNING,
 	TOMOYO_CONFIG_PERMISSIVE,
 	TOMOYO_CONFIG_ENFORCING,
-	TOMOYO_CONFIG_USE_DEFAULT = 255
+	TOMOYO_CONFIG_MAX_MODE,
+	TOMOYO_CONFIG_WANT_REJECT_LOG =  64,
+	TOMOYO_CONFIG_WANT_GRANT_LOG  = 128,
+	TOMOYO_CONFIG_USE_DEFAULT     = 255,
 };
 
+/* Index numbers for entry type. */
 enum tomoyo_policy_id {
 	TOMOYO_ID_GROUP,
 	TOMOYO_ID_PATH_GROUP,
 	TOMOYO_ID_NUMBER_GROUP,
 	TOMOYO_ID_TRANSITION_CONTROL,
 	TOMOYO_ID_AGGREGATOR,
-	TOMOYO_ID_GLOBALLY_READABLE,
-	TOMOYO_ID_PATTERN,
-	TOMOYO_ID_NO_REWRITE,
 	TOMOYO_ID_MANAGER,
+	TOMOYO_ID_CONDITION,
 	TOMOYO_ID_NAME,
 	TOMOYO_ID_ACL,
 	TOMOYO_ID_DOMAIN,
 	TOMOYO_MAX_POLICY
 };
 
+/* Index numbers for domain's attributes. */
+enum tomoyo_domain_info_flags_index {
+	/* Quota warnning flag.   */
+	TOMOYO_DIF_QUOTA_WARNED,
+	/*
+	 * This domain was unable to create a new domain at
+	 * tomoyo_find_next_domain() because the name of the domain to be
+	 * created was too long or it could not allocate memory.
+	 * More than one process continued execve() without domain transition.
+	 */
+	TOMOYO_DIF_TRANSITION_FAILED,
+	TOMOYO_MAX_DOMAIN_INFO_FLAGS
+};
+
+/* Index numbers for group entries. */
 enum tomoyo_group_id {
 	TOMOYO_PATH_GROUP,
 	TOMOYO_NUMBER_GROUP,
 	TOMOYO_MAX_GROUP
 };
 
-/* Keywords for ACLs. */
-#define TOMOYO_KEYWORD_AGGREGATOR                "aggregator "
-#define TOMOYO_KEYWORD_ALLOW_MOUNT               "allow_mount "
-#define TOMOYO_KEYWORD_ALLOW_READ                "allow_read "
-#define TOMOYO_KEYWORD_DELETE                    "delete "
-#define TOMOYO_KEYWORD_DENY_REWRITE              "deny_rewrite "
-#define TOMOYO_KEYWORD_FILE_PATTERN              "file_pattern "
-#define TOMOYO_KEYWORD_INITIALIZE_DOMAIN         "initialize_domain "
-#define TOMOYO_KEYWORD_KEEP_DOMAIN               "keep_domain "
-#define TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN      "no_initialize_domain "
-#define TOMOYO_KEYWORD_NO_KEEP_DOMAIN            "no_keep_domain "
-#define TOMOYO_KEYWORD_PATH_GROUP                "path_group "
-#define TOMOYO_KEYWORD_NUMBER_GROUP              "number_group "
-#define TOMOYO_KEYWORD_SELECT                    "select "
-#define TOMOYO_KEYWORD_USE_PROFILE               "use_profile "
-#define TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ  "ignore_global_allow_read"
-#define TOMOYO_KEYWORD_QUOTA_EXCEEDED            "quota_exceeded"
-#define TOMOYO_KEYWORD_TRANSITION_FAILED         "transition_failed"
-/* A domain definition starts with <kernel>. */
-#define TOMOYO_ROOT_NAME                         "<kernel>"
-#define TOMOYO_ROOT_NAME_LEN                     (sizeof(TOMOYO_ROOT_NAME) - 1)
+/* Index numbers for type of numeric values. */
+enum tomoyo_value_type {
+	TOMOYO_VALUE_TYPE_INVALID,
+	TOMOYO_VALUE_TYPE_DECIMAL,
+	TOMOYO_VALUE_TYPE_OCTAL,
+	TOMOYO_VALUE_TYPE_HEXADECIMAL,
+};
 
-/* Value type definition. */
-#define TOMOYO_VALUE_TYPE_INVALID     0
-#define TOMOYO_VALUE_TYPE_DECIMAL     1
-#define TOMOYO_VALUE_TYPE_OCTAL       2
-#define TOMOYO_VALUE_TYPE_HEXADECIMAL 3
-
+/* Index numbers for domain transition control keywords. */
 enum tomoyo_transition_type {
 	/* Do not change this order, */
+	TOMOYO_TRANSITION_CONTROL_NO_RESET,
+	TOMOYO_TRANSITION_CONTROL_RESET,
 	TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE,
 	TOMOYO_TRANSITION_CONTROL_INITIALIZE,
 	TOMOYO_TRANSITION_CONTROL_NO_KEEP,
@@ -114,35 +198,29 @@
 	TOMOYO_TYPE_MOUNT_ACL,
 };
 
-/* Index numbers for File Controls. */
-
-/*
- * TOMOYO_TYPE_READ_WRITE is special. TOMOYO_TYPE_READ_WRITE is automatically
- * set if both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are set.
- * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically set if
- * TOMOYO_TYPE_READ_WRITE is set.
- * TOMOYO_TYPE_READ_WRITE is automatically cleared if either TOMOYO_TYPE_READ
- * or TOMOYO_TYPE_WRITE is cleared.
- * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically cleared if
- * TOMOYO_TYPE_READ_WRITE is cleared.
- */
-
+/* Index numbers for access controls with one pathname. */
 enum tomoyo_path_acl_index {
-	TOMOYO_TYPE_READ_WRITE,
 	TOMOYO_TYPE_EXECUTE,
 	TOMOYO_TYPE_READ,
 	TOMOYO_TYPE_WRITE,
+	TOMOYO_TYPE_APPEND,
 	TOMOYO_TYPE_UNLINK,
+	TOMOYO_TYPE_GETATTR,
 	TOMOYO_TYPE_RMDIR,
 	TOMOYO_TYPE_TRUNCATE,
 	TOMOYO_TYPE_SYMLINK,
-	TOMOYO_TYPE_REWRITE,
 	TOMOYO_TYPE_CHROOT,
 	TOMOYO_TYPE_UMOUNT,
 	TOMOYO_MAX_PATH_OPERATION
 };
 
-#define TOMOYO_RW_MASK ((1 << TOMOYO_TYPE_READ) | (1 << TOMOYO_TYPE_WRITE))
+/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
+enum tomoyo_memory_stat_type {
+	TOMOYO_MEMORY_POLICY,
+	TOMOYO_MEMORY_AUDIT,
+	TOMOYO_MEMORY_QUERY,
+	TOMOYO_MAX_MEMORY_STAT
+};
 
 enum tomoyo_mkdev_acl_index {
 	TOMOYO_TYPE_MKBLOCK,
@@ -150,6 +228,7 @@
 	TOMOYO_MAX_MKDEV_OPERATION
 };
 
+/* Index numbers for access controls with two pathnames. */
 enum tomoyo_path2_acl_index {
 	TOMOYO_TYPE_LINK,
 	TOMOYO_TYPE_RENAME,
@@ -157,6 +236,7 @@
 	TOMOYO_MAX_PATH2_OPERATION
 };
 
+/* Index numbers for access controls with one pathname and one number. */
 enum tomoyo_path_number_acl_index {
 	TOMOYO_TYPE_CREATE,
 	TOMOYO_TYPE_MKDIR,
@@ -169,31 +249,45 @@
 	TOMOYO_MAX_PATH_NUMBER_OPERATION
 };
 
+/* Index numbers for /sys/kernel/security/tomoyo/ interfaces. */
 enum tomoyo_securityfs_interface_index {
 	TOMOYO_DOMAINPOLICY,
 	TOMOYO_EXCEPTIONPOLICY,
-	TOMOYO_DOMAIN_STATUS,
 	TOMOYO_PROCESS_STATUS,
-	TOMOYO_MEMINFO,
+	TOMOYO_STAT,
 	TOMOYO_SELFDOMAIN,
+	TOMOYO_AUDIT,
 	TOMOYO_VERSION,
 	TOMOYO_PROFILE,
 	TOMOYO_QUERY,
 	TOMOYO_MANAGER
 };
 
+/* Index numbers for special mount operations. */
+enum tomoyo_special_mount {
+	TOMOYO_MOUNT_BIND,            /* mount --bind /source /dest   */
+	TOMOYO_MOUNT_MOVE,            /* mount --move /old /new       */
+	TOMOYO_MOUNT_REMOUNT,         /* mount -o remount /dir        */
+	TOMOYO_MOUNT_MAKE_UNBINDABLE, /* mount --make-unbindable /dir */
+	TOMOYO_MOUNT_MAKE_PRIVATE,    /* mount --make-private /dir    */
+	TOMOYO_MOUNT_MAKE_SLAVE,      /* mount --make-slave /dir      */
+	TOMOYO_MOUNT_MAKE_SHARED,     /* mount --make-shared /dir     */
+	TOMOYO_MAX_SPECIAL_MOUNT
+};
+
+/* Index numbers for functionality. */
 enum tomoyo_mac_index {
 	TOMOYO_MAC_FILE_EXECUTE,
 	TOMOYO_MAC_FILE_OPEN,
 	TOMOYO_MAC_FILE_CREATE,
 	TOMOYO_MAC_FILE_UNLINK,
+	TOMOYO_MAC_FILE_GETATTR,
 	TOMOYO_MAC_FILE_MKDIR,
 	TOMOYO_MAC_FILE_RMDIR,
 	TOMOYO_MAC_FILE_MKFIFO,
 	TOMOYO_MAC_FILE_MKSOCK,
 	TOMOYO_MAC_FILE_TRUNCATE,
 	TOMOYO_MAC_FILE_SYMLINK,
-	TOMOYO_MAC_FILE_REWRITE,
 	TOMOYO_MAC_FILE_MKBLOCK,
 	TOMOYO_MAC_FILE_MKCHAR,
 	TOMOYO_MAC_FILE_LINK,
@@ -209,38 +303,66 @@
 	TOMOYO_MAX_MAC_INDEX
 };
 
+/* Index numbers for category of functionality. */
 enum tomoyo_mac_category_index {
 	TOMOYO_MAC_CATEGORY_FILE,
 	TOMOYO_MAX_MAC_CATEGORY_INDEX
 };
 
-#define TOMOYO_RETRY_REQUEST 1 /* Retry this request. */
+/*
+ * Retry this request. Returned by tomoyo_supervisor() if policy violation has
+ * occurred in enforcing mode and the userspace daemon decided to retry.
+ *
+ * We must choose a positive value in order to distinguish "granted" (which is
+ * 0) and "rejected" (which is a negative value) and "retry".
+ */
+#define TOMOYO_RETRY_REQUEST 1
+
+/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
+enum tomoyo_policy_stat_type {
+	/* Do not change this order. */
+	TOMOYO_STAT_POLICY_UPDATES,
+	TOMOYO_STAT_POLICY_LEARNING,   /* == TOMOYO_CONFIG_LEARNING */
+	TOMOYO_STAT_POLICY_PERMISSIVE, /* == TOMOYO_CONFIG_PERMISSIVE */
+	TOMOYO_STAT_POLICY_ENFORCING,  /* == TOMOYO_CONFIG_ENFORCING */
+	TOMOYO_MAX_POLICY_STAT
+};
+
+/* Index numbers for profile's PREFERENCE values. */
+enum tomoyo_pref_index {
+	TOMOYO_PREF_MAX_AUDIT_LOG,
+	TOMOYO_PREF_MAX_LEARNING_ENTRY,
+	TOMOYO_MAX_PREF
+};
 
 /********** Structure definitions. **********/
 
-/*
- * tomoyo_acl_head is a structure which is used for holding elements not in
- * domain policy.
- * It has following fields.
- *
- *  (1) "list" which is linked to tomoyo_policy_list[] .
- *  (2) "is_deleted" is a bool which is true if marked as deleted, false
- *      otherwise.
- */
+/* Common header for holding ACL entries. */
 struct tomoyo_acl_head {
 	struct list_head list;
 	bool is_deleted;
 } __packed;
 
-/*
- * tomoyo_request_info is a structure which is used for holding
- *
- * (1) Domain information of current process.
- * (2) How many retries are made for this request.
- * (3) Profile number used for this request.
- * (4) Access control mode of the profile.
- */
+/* Common header for shared entries. */
+struct tomoyo_shared_acl_head {
+	struct list_head list;
+	atomic_t users;
+} __packed;
+
+struct tomoyo_policy_namespace;
+
+/* Structure for request info. */
 struct tomoyo_request_info {
+	/*
+	 * For holding parameters specific to operations which deal files.
+	 * NULL if not dealing files.
+	 */
+	struct tomoyo_obj_info *obj;
+	/*
+	 * For holding parameters specific to execve() request.
+	 * NULL if not dealing do_execve().
+	 */
+	struct tomoyo_execve *ee;
 	struct tomoyo_domain_info *domain;
 	/* For holding parameters. */
 	union {
@@ -248,11 +370,13 @@
 			const struct tomoyo_path_info *filename;
 			/* For using wildcards at tomoyo_find_next_domain(). */
 			const struct tomoyo_path_info *matched_path;
+			/* One of values in "enum tomoyo_path_acl_index". */
 			u8 operation;
 		} path;
 		struct {
 			const struct tomoyo_path_info *filename1;
 			const struct tomoyo_path_info *filename2;
+			/* One of values in "enum tomoyo_path2_acl_index". */
 			u8 operation;
 		} path2;
 		struct {
@@ -260,11 +384,16 @@
 			unsigned int mode;
 			unsigned int major;
 			unsigned int minor;
+			/* One of values in "enum tomoyo_mkdev_acl_index". */
 			u8 operation;
 		} mkdev;
 		struct {
 			const struct tomoyo_path_info *filename;
 			unsigned long number;
+			/*
+			 * One of values in
+			 * "enum tomoyo_path_number_acl_index".
+			 */
 			u8 operation;
 		} path_number;
 		struct {
@@ -283,26 +412,7 @@
 	u8 type;
 };
 
-/*
- * tomoyo_path_info is a structure which is used for holding a string data
- * used by TOMOYO.
- * This structure has several fields for supporting pattern matching.
- *
- * (1) "name" is the '\0' terminated string data.
- * (2) "hash" is full_name_hash(name, strlen(name)).
- *     This allows tomoyo_pathcmp() to compare by hash before actually compare
- *     using strcmp().
- * (3) "const_len" is the length of the initial segment of "name" which
- *     consists entirely of non wildcard characters. In other words, the length
- *     which we can compare two strings using strncmp().
- * (4) "is_dir" is a bool which is true if "name" ends with "/",
- *     false otherwise.
- *     TOMOYO distinguishes directory and non-directory. A directory ends with
- *     "/" and non-directory does not end with "/".
- * (5) "is_patterned" is a bool which is true if "name" contains wildcard
- *     characters, false otherwise. This allows TOMOYO to use "hash" and
- *     strcmp() for string comparison if "is_patterned" is false.
- */
+/* Structure for holding a token. */
 struct tomoyo_path_info {
 	const char *name;
 	u32 hash;          /* = full_name_hash(name, strlen(name)) */
@@ -311,36 +421,32 @@
 	bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
 };
 
-/*
- * tomoyo_name is a structure which is used for linking
- * "struct tomoyo_path_info" into tomoyo_name_list .
- */
+/* Structure for holding string data. */
 struct tomoyo_name {
-	struct list_head list;
-	atomic_t users;
+	struct tomoyo_shared_acl_head head;
 	struct tomoyo_path_info entry;
 };
 
+/* Structure for holding a word. */
 struct tomoyo_name_union {
+	/* Either @filename or @group is NULL. */
 	const struct tomoyo_path_info *filename;
 	struct tomoyo_group *group;
-	u8 is_group;
 };
 
+/* Structure for holding a number. */
 struct tomoyo_number_union {
 	unsigned long values[2];
-	struct tomoyo_group *group;
-	u8 min_type;
-	u8 max_type;
-	u8 is_group;
+	struct tomoyo_group *group; /* Maybe NULL. */
+	/* One of values in "enum tomoyo_value_type". */
+	u8 value_type[2];
 };
 
 /* Structure for "path_group"/"number_group" directive. */
 struct tomoyo_group {
-	struct list_head list;
+	struct tomoyo_shared_acl_head head;
 	const struct tomoyo_path_info *group_name;
 	struct list_head member_list;
-	atomic_t users;
 };
 
 /* Structure for "path_group" directive. */
@@ -355,130 +461,158 @@
 	struct tomoyo_number_union number;
 };
 
-/*
- * tomoyo_acl_info is a structure which is used for holding
- *
- *  (1) "list" which is linked to the ->acl_info_list of
- *      "struct tomoyo_domain_info"
- *  (2) "is_deleted" is a bool which is true if this domain is marked as
- *      "deleted", false otherwise.
- *  (3) "type" which tells type of the entry.
- *
- * Packing "struct tomoyo_acl_info" allows
- * "struct tomoyo_path_acl" to embed "u16" and "struct tomoyo_path2_acl"
- * "struct tomoyo_path_number_acl" "struct tomoyo_mkdev_acl" to embed
- * "u8" without enlarging their structure size.
- */
+/* Subset of "struct stat". Used by conditional ACL and audit logs. */
+struct tomoyo_mini_stat {
+	uid_t uid;
+	gid_t gid;
+	ino_t ino;
+	mode_t mode;
+	dev_t dev;
+	dev_t rdev;
+};
+
+/* Structure for dumping argv[] and envp[] of "struct linux_binprm". */
+struct tomoyo_page_dump {
+	struct page *page;    /* Previously dumped page. */
+	char *data;           /* Contents of "page". Size is PAGE_SIZE. */
+};
+
+/* Structure for attribute checks in addition to pathname checks. */
+struct tomoyo_obj_info {
+	/*
+	 * True if tomoyo_get_attributes() was already called, false otherwise.
+	 */
+	bool validate_done;
+	/* True if @stat[] is valid. */
+	bool stat_valid[TOMOYO_MAX_PATH_STAT];
+	/* First pathname. Initialized with { NULL, NULL } if no path. */
+	struct path path1;
+	/* Second pathname. Initialized with { NULL, NULL } if no path. */
+	struct path path2;
+	/*
+	 * Information on @path1, @path1's parent directory, @path2, @path2's
+	 * parent directory.
+	 */
+	struct tomoyo_mini_stat stat[TOMOYO_MAX_PATH_STAT];
+	/*
+	 * Content of symbolic link to be created. NULL for operations other
+	 * than symlink().
+	 */
+	struct tomoyo_path_info *symlink_target;
+};
+
+/* Structure for argv[]. */
+struct tomoyo_argv {
+	unsigned long index;
+	const struct tomoyo_path_info *value;
+	bool is_not;
+};
+
+/* Structure for envp[]. */
+struct tomoyo_envp {
+	const struct tomoyo_path_info *name;
+	const struct tomoyo_path_info *value;
+	bool is_not;
+};
+
+/* Structure for execve() operation. */
+struct tomoyo_execve {
+	struct tomoyo_request_info r;
+	struct tomoyo_obj_info obj;
+	struct linux_binprm *bprm;
+	/* For dumping argv[] and envp[]. */
+	struct tomoyo_page_dump dump;
+	/* For temporary use. */
+	char *tmp; /* Size is TOMOYO_EXEC_TMPSIZE bytes */
+};
+
+/* Structure for entries which follows "struct tomoyo_condition". */
+struct tomoyo_condition_element {
+	/*
+	 * Left hand operand. A "struct tomoyo_argv" for TOMOYO_ARGV_ENTRY, a
+	 * "struct tomoyo_envp" for TOMOYO_ENVP_ENTRY is attached to the tail
+	 * of the array of this struct.
+	 */
+	u8 left;
+	/*
+	 * Right hand operand. A "struct tomoyo_number_union" for
+	 * TOMOYO_NUMBER_UNION, a "struct tomoyo_name_union" for
+	 * TOMOYO_NAME_UNION is attached to the tail of the array of this
+	 * struct.
+	 */
+	u8 right;
+	/* Equation operator. True if equals or overlaps, false otherwise. */
+	bool equals;
+};
+
+/* Structure for optional arguments. */
+struct tomoyo_condition {
+	struct tomoyo_shared_acl_head head;
+	u32 size; /* Memory size allocated for this entry. */
+	u16 condc; /* Number of conditions in this struct. */
+	u16 numbers_count; /* Number of "struct tomoyo_number_union values". */
+	u16 names_count; /* Number of "struct tomoyo_name_union names". */
+	u16 argc; /* Number of "struct tomoyo_argv". */
+	u16 envc; /* Number of "struct tomoyo_envp". */
+	/*
+	 * struct tomoyo_condition_element condition[condc];
+	 * struct tomoyo_number_union values[numbers_count];
+	 * struct tomoyo_name_union names[names_count];
+	 * struct tomoyo_argv argv[argc];
+	 * struct tomoyo_envp envp[envc];
+	 */
+};
+
+/* Common header for individual entries. */
 struct tomoyo_acl_info {
 	struct list_head list;
+	struct tomoyo_condition *cond; /* Maybe NULL. */
 	bool is_deleted;
-	u8 type; /* = one of values in "enum tomoyo_acl_entry_type_index". */
+	u8 type; /* One of values in "enum tomoyo_acl_entry_type_index". */
 } __packed;
 
-/*
- * tomoyo_domain_info is a structure which is used for holding permissions
- * (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
- * It has following fields.
- *
- *  (1) "list" which is linked to tomoyo_domain_list .
- *  (2) "acl_info_list" which is linked to "struct tomoyo_acl_info".
- *  (3) "domainname" which holds the name of the domain.
- *  (4) "profile" which remembers profile number assigned to this domain.
- *  (5) "is_deleted" is a bool which is true if this domain is marked as
- *      "deleted", false otherwise.
- *  (6) "quota_warned" is a bool which is used for suppressing warning message
- *      when learning mode learned too much entries.
- *  (7) "ignore_global_allow_read" is a bool which is true if this domain
- *      should ignore "allow_read" directive in exception policy.
- *  (8) "transition_failed" is a bool which is set to true when this domain was
- *      unable to create a new domain at tomoyo_find_next_domain() because the
- *      name of the domain to be created was too long or it could not allocate
- *      memory. If set to true, more than one process continued execve()
- *      without domain transition.
- *  (9) "users" is an atomic_t that holds how many "struct cred"->security
- *      are referring this "struct tomoyo_domain_info". If is_deleted == true
- *      and users == 0, this struct will be kfree()d upon next garbage
- *      collection.
- *
- * A domain's lifecycle is an analogy of files on / directory.
- * Multiple domains with the same domainname cannot be created (as with
- * creating files with the same filename fails with -EEXIST).
- * If a process reached a domain, that process can reside in that domain after
- * that domain is marked as "deleted" (as with a process can access an already
- * open()ed file after that file was unlink()ed).
- */
+/* Structure for domain information. */
 struct tomoyo_domain_info {
 	struct list_head list;
 	struct list_head acl_info_list;
 	/* Name of this domain. Never NULL.          */
 	const struct tomoyo_path_info *domainname;
+	/* Namespace for this domain. Never NULL. */
+	struct tomoyo_policy_namespace *ns;
 	u8 profile;        /* Profile number to use. */
+	u8 group;          /* Group number to use.   */
 	bool is_deleted;   /* Delete flag.           */
-	bool quota_warned; /* Quota warnning flag.   */
-	bool ignore_global_allow_read; /* Ignore "allow_read" flag. */
-	bool transition_failed; /* Domain transition failed flag. */
+	bool flags[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
 	atomic_t users; /* Number of referring credentials. */
 };
 
 /*
- * tomoyo_path_acl is a structure which is used for holding an
- * entry with one pathname operation (e.g. open(), mkdir()).
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "name" is the pathname.
- *
- * Directives held by this structure are "allow_read/write", "allow_execute",
- * "allow_read", "allow_write", "allow_unlink", "allow_rmdir",
- * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_chroot" and
- * "allow_unmount".
+ * Structure for "file execute", "file read", "file write", "file append",
+ * "file unlink", "file getattr", "file rmdir", "file truncate",
+ * "file symlink", "file chroot" and "file unmount" directive.
  */
 struct tomoyo_path_acl {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_ACL */
-	u16 perm;
+	u16 perm; /* Bitmask of values in "enum tomoyo_path_acl_index". */
 	struct tomoyo_name_union name;
 };
 
 /*
- * tomoyo_path_number_acl is a structure which is used for holding an
- * entry with one pathname and one number operation.
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "name" is the pathname.
- *  (4) "number" is the numeric value.
- *
- * Directives held by this structure are "allow_create", "allow_mkdir",
- * "allow_ioctl", "allow_mkfifo", "allow_mksock", "allow_chmod", "allow_chown"
- * and "allow_chgrp".
- *
+ * Structure for "file create", "file mkdir", "file mkfifo", "file mksock",
+ * "file ioctl", "file chmod", "file chown" and "file chgrp" directive.
  */
 struct tomoyo_path_number_acl {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_NUMBER_ACL */
+	/* Bitmask of values in "enum tomoyo_path_number_acl_index". */
 	u8 perm;
 	struct tomoyo_name_union name;
 	struct tomoyo_number_union number;
 };
 
-/*
- * tomoyo_mkdev_acl is a structure which is used for holding an
- * entry with one pathname and three numbers operation.
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "mode" is the create mode.
- *  (4) "major" is the major number of device node.
- *  (5) "minor" is the minor number of device node.
- *
- * Directives held by this structure are "allow_mkchar", "allow_mkblock".
- *
- */
+/* Structure for "file mkblock" and "file mkchar" directive. */
 struct tomoyo_mkdev_acl {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MKDEV_ACL */
-	u8 perm;
+	u8 perm; /* Bitmask of values in "enum tomoyo_mkdev_acl_index". */
 	struct tomoyo_name_union name;
 	struct tomoyo_number_union mode;
 	struct tomoyo_number_union major;
@@ -486,38 +620,16 @@
 };
 
 /*
- * tomoyo_path2_acl is a structure which is used for holding an
- * entry with two pathnames operation (i.e. link(), rename() and pivot_root()).
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "name1" is the source/old pathname.
- *  (4) "name2" is the destination/new pathname.
- *
- * Directives held by this structure are "allow_rename", "allow_link" and
- * "allow_pivot_root".
+ * Structure for "file rename", "file link" and "file pivot_root" directive.
  */
 struct tomoyo_path2_acl {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH2_ACL */
-	u8 perm;
+	u8 perm; /* Bitmask of values in "enum tomoyo_path2_acl_index". */
 	struct tomoyo_name_union name1;
 	struct tomoyo_name_union name2;
 };
 
-/*
- * tomoyo_mount_acl is a structure which is used for holding an
- * entry for mount operation.
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "dev_name" is the device name.
- *  (3) "dir_name" is the mount point.
- *  (4) "fs_type" is the filesystem type.
- *  (5) "flags" is the mount flags.
- *
- * Directive held by this structure is "allow_mount".
- */
+/* Structure for "file mount" directive. */
 struct tomoyo_mount_acl {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MOUNT_ACL */
 	struct tomoyo_name_union dev_name;
@@ -526,7 +638,15 @@
 	struct tomoyo_number_union flags;
 };
 
-#define TOMOYO_MAX_IO_READ_QUEUE 32
+/* Structure for holding a line from /sys/kernel/security/tomoyo/ interface. */
+struct tomoyo_acl_param {
+	char *data;
+	struct list_head *list;
+	struct tomoyo_policy_namespace *ns;
+	bool is_delete;
+};
+
+#define TOMOYO_MAX_IO_READ_QUEUE 64
 
 /*
  * Structure for reading/writing policy via /sys/kernel/security/tomoyo
@@ -538,95 +658,55 @@
 	int (*poll) (struct file *file, poll_table *wait);
 	/* Exclusive lock for this structure.   */
 	struct mutex io_sem;
-	/* Index returned by tomoyo_read_lock(). */
-	int reader_idx;
 	char __user *read_user_buf;
-	int read_user_buf_avail;
+	size_t read_user_buf_avail;
 	struct {
+		struct list_head *ns;
 		struct list_head *domain;
 		struct list_head *group;
 		struct list_head *acl;
-		int avail;
-		int step;
-		int query_index;
+		size_t avail;
+		unsigned int step;
+		unsigned int query_index;
 		u16 index;
+		u16 cond_index;
+		u8 acl_group_index;
+		u8 cond_step;
 		u8 bit;
 		u8 w_pos;
 		bool eof;
 		bool print_this_domain_only;
-		bool print_execute_only;
+		bool print_transition_related_only;
+		bool print_cond_part;
 		const char *w[TOMOYO_MAX_IO_READ_QUEUE];
 	} r;
-	/* The position currently writing to.   */
-	struct tomoyo_domain_info *write_var1;
+	struct {
+		struct tomoyo_policy_namespace *ns;
+		/* The position currently writing to.   */
+		struct tomoyo_domain_info *domain;
+		/* Bytes available for writing.         */
+		size_t avail;
+		bool is_delete;
+	} w;
 	/* Buffer for reading.                  */
 	char *read_buf;
 	/* Size of read buffer.                 */
-	int readbuf_size;
+	size_t readbuf_size;
 	/* Buffer for writing.                  */
 	char *write_buf;
-	/* Bytes available for writing.         */
-	int write_avail;
 	/* Size of write buffer.                */
-	int writebuf_size;
+	size_t writebuf_size;
 	/* Type of this interface.              */
-	u8 type;
+	enum tomoyo_securityfs_interface_index type;
+	/* Users counter protected by tomoyo_io_buffer_list_lock. */
+	u8 users;
+	/* List for telling GC not to kfree() elements. */
+	struct list_head list;
 };
 
 /*
- * tomoyo_readable_file is a structure which is used for holding
- * "allow_read" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "filename" is a pathname which is allowed to open(O_RDONLY).
- */
-struct tomoyo_readable_file {
-	struct tomoyo_acl_head head;
-	const struct tomoyo_path_info *filename;
-};
-
-/*
- * tomoyo_no_pattern is a structure which is used for holding
- * "file_pattern" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "pattern" is a pathname pattern which is used for converting pathnames
- *      to pathname patterns during learning mode.
- */
-struct tomoyo_no_pattern {
-	struct tomoyo_acl_head head;
-	const struct tomoyo_path_info *pattern;
-};
-
-/*
- * tomoyo_no_rewrite is a structure which is used for holding
- * "deny_rewrite" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "pattern" is a pathname which is by default not permitted to modify
- *      already existing content.
- */
-struct tomoyo_no_rewrite {
-	struct tomoyo_acl_head head;
-	const struct tomoyo_path_info *pattern;
-};
-
-/*
- * tomoyo_transition_control is a structure which is used for holding
- * "initialize_domain"/"no_initialize_domain"/"keep_domain"/"no_keep_domain"
- * entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "type" is type of this entry.
- *  (3) "is_last_name" is a bool which is true if "domainname" is "the last
- *      component of a domainname", false otherwise.
- *  (4) "domainname" which is "a domainname" or "the last component of a
- *      domainname".
- *  (5) "program" which is a program's pathname.
+ * Structure for "initialize_domain"/"no_initialize_domain"/"keep_domain"/
+ * "no_keep_domain" keyword.
  */
 struct tomoyo_transition_control {
 	struct tomoyo_acl_head head;
@@ -637,32 +717,14 @@
 	const struct tomoyo_path_info *program;    /* Maybe NULL */
 };
 
-/*
- * tomoyo_aggregator is a structure which is used for holding
- * "aggregator" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "original_name" which is originally requested name.
- *  (3) "aggregated_name" which is name to rewrite.
- */
+/* Structure for "aggregator" keyword. */
 struct tomoyo_aggregator {
 	struct tomoyo_acl_head head;
 	const struct tomoyo_path_info *original_name;
 	const struct tomoyo_path_info *aggregated_name;
 };
 
-/*
- * tomoyo_manager is a structure which is used for holding list of
- * domainnames or programs which are permitted to modify configuration via
- * /sys/kernel/security/tomoyo/ interface.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "is_domain" is a bool which is true if "manager" is a domainname, false
- *      otherwise.
- *  (3) "manager" is a domainname or a program's pathname.
- */
+/* Structure for policy manager. */
 struct tomoyo_manager {
 	struct tomoyo_acl_head head;
 	bool is_domain;  /* True if manager is a domainname. */
@@ -677,6 +739,7 @@
 	bool permissive_verbose;
 };
 
+/* Structure for /sys/kernel/security/tomnoyo/profile interface. */
 struct tomoyo_profile {
 	const struct tomoyo_path_info *comment;
 	struct tomoyo_preference *learning;
@@ -685,246 +748,254 @@
 	struct tomoyo_preference preference;
 	u8 default_config;
 	u8 config[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX];
+	unsigned int pref[TOMOYO_MAX_PREF];
+};
+
+/* Structure for representing YYYY/MM/DD hh/mm/ss. */
+struct tomoyo_time {
+	u16 year;
+	u8 month;
+	u8 day;
+	u8 hour;
+	u8 min;
+	u8 sec;
+};
+
+/* Structure for policy namespace. */
+struct tomoyo_policy_namespace {
+	/* Profile table. Memory is allocated as needed. */
+	struct tomoyo_profile *profile_ptr[TOMOYO_MAX_PROFILES];
+	/* List of "struct tomoyo_group". */
+	struct list_head group_list[TOMOYO_MAX_GROUP];
+	/* List of policy. */
+	struct list_head policy_list[TOMOYO_MAX_POLICY];
+	/* The global ACL referred by "use_group" keyword. */
+	struct list_head acl_group[TOMOYO_MAX_ACL_GROUPS];
+	/* List for connecting to tomoyo_namespace_list list. */
+	struct list_head namespace_list;
+	/* Profile version. Currently only 20100903 is defined. */
+	unsigned int profile_version;
+	/* Name of this namespace (e.g. "<kernel>", "</usr/sbin/httpd>" ). */
+	const char *name;
 };
 
 /********** Function prototypes. **********/
 
-/* Check whether the given string starts with the given keyword. */
-bool tomoyo_str_starts(char **src, const char *find);
-/* Get tomoyo_realpath() of current process. */
-const char *tomoyo_get_exe(void);
-/* Format string. */
-void tomoyo_normalize_line(unsigned char *buffer);
-/* Print warning or error message on console. */
-void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...)
-     __attribute__ ((format(printf, 2, 3)));
-/* Check all profiles currently assigned to domains are defined. */
-void tomoyo_check_profile(void);
-/* Open operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_open_control(const u8 type, struct file *file);
-/* Close /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_close_control(struct file *file);
-/* Poll operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_poll_control(struct file *file, poll_table *wait);
-/* Read operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_read_control(struct file *file, char __user *buffer,
-			const int buffer_len);
-/* Write operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_write_control(struct file *file, const char __user *buffer,
-			 const int buffer_len);
-/* Check whether the domain has too many ACL entries to hold. */
-bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
-/* Print out of memory warning message. */
-void tomoyo_warn_oom(const char *function);
-/* Check whether the given name matches the given name_union. */
-const struct tomoyo_path_info *
-tomoyo_compare_name_union(const struct tomoyo_path_info *name,
-			  const struct tomoyo_name_union *ptr);
-/* Check whether the given number matches the given number_union. */
 bool tomoyo_compare_number_union(const unsigned long value,
 				 const struct tomoyo_number_union *ptr);
-int tomoyo_get_mode(const u8 profile, const u8 index);
-void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
-	__attribute__ ((format(printf, 2, 3)));
-/* Check whether the domainname is correct. */
+bool tomoyo_condition(struct tomoyo_request_info *r,
+		      const struct tomoyo_condition *cond);
 bool tomoyo_correct_domain(const unsigned char *domainname);
-/* Check whether the token is correct. */
 bool tomoyo_correct_path(const char *filename);
 bool tomoyo_correct_word(const char *string);
-/* Check whether the token can be a domainname. */
 bool tomoyo_domain_def(const unsigned char *buffer);
-bool tomoyo_parse_name_union(const char *filename,
-			     struct tomoyo_name_union *ptr);
-/* Check whether the given filename matches the given path_group. */
-const struct tomoyo_path_info *
-tomoyo_path_matches_group(const struct tomoyo_path_info *pathname,
-			  const struct tomoyo_group *group);
-/* Check whether the given value matches the given number_group. */
+bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
+bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
+		      struct tomoyo_page_dump *dump);
+bool tomoyo_memory_ok(void *ptr);
 bool tomoyo_number_matches_group(const unsigned long min,
 				 const unsigned long max,
 				 const struct tomoyo_group *group);
-/* Check whether the given filename matches the given pattern. */
+bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
+			     struct tomoyo_name_union *ptr);
+bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
+			       struct tomoyo_number_union *ptr);
 bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename,
 				 const struct tomoyo_path_info *pattern);
-
-bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num);
-/* Tokenize a line. */
-bool tomoyo_tokenize(char *buffer, char *w[], size_t size);
-/* Write domain policy violation warning message to console? */
-bool tomoyo_verbose_mode(const struct tomoyo_domain_info *domain);
-/* Fill "struct tomoyo_request_info". */
+bool tomoyo_permstr(const char *string, const char *keyword);
+bool tomoyo_str_starts(char **src, const char *find);
+char *tomoyo_encode(const char *str);
+char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
+		      va_list args);
+char *tomoyo_read_token(struct tomoyo_acl_param *param);
+char *tomoyo_realpath_from_path(struct path *path);
+char *tomoyo_realpath_nofollow(const char *pathname);
+const char *tomoyo_get_exe(void);
+const char *tomoyo_yesno(const unsigned int value);
+const struct tomoyo_path_info *tomoyo_compare_name_union
+(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr);
+const struct tomoyo_path_info *tomoyo_get_name(const char *name);
+const struct tomoyo_path_info *tomoyo_path_matches_group
+(const struct tomoyo_path_info *pathname, const struct tomoyo_group *group);
+int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
+				 struct path *path, const int flag);
+int tomoyo_close_control(struct tomoyo_io_buffer *head);
+int tomoyo_find_next_domain(struct linux_binprm *bprm);
+int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
+		    const u8 index);
 int tomoyo_init_request_info(struct tomoyo_request_info *r,
 			     struct tomoyo_domain_info *domain,
 			     const u8 index);
-/* Check permission for mount operation. */
-int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
-			    unsigned long flags, void *data_page);
-/* Create "aggregator" entry in exception policy. */
-int tomoyo_write_aggregator(char *data, const bool is_delete);
-int tomoyo_write_transition_control(char *data, const bool is_delete,
-				    const u8 type);
-/*
- * Create "allow_read/write", "allow_execute", "allow_read", "allow_write",
- * "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir",
- * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar",
- * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_rename" and
- * "allow_link" entry in domain policy.
- */
-int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain,
-		      const bool is_delete);
-/* Create "allow_read" entry in exception policy. */
-int tomoyo_write_globally_readable(char *data, const bool is_delete);
-/* Create "allow_mount" entry in domain policy. */
-int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain,
-		       const bool is_delete);
-/* Create "deny_rewrite" entry in exception policy. */
-int tomoyo_write_no_rewrite(char *data, const bool is_delete);
-/* Create "file_pattern" entry in exception policy. */
-int tomoyo_write_pattern(char *data, const bool is_delete);
-/* Create "path_group"/"number_group" entry in exception policy. */
-int tomoyo_write_group(char *data, const bool is_delete, const u8 type);
-int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
-     __attribute__ ((format(printf, 2, 3)));
-/* Find a domain by the given name. */
-struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
-/* Find or create a domain by the given name. */
-struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
-						const u8 profile);
-struct tomoyo_profile *tomoyo_profile(const u8 profile);
-/*
- * Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group".
- */
-struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 type);
-
-/* Check mode for specified functionality. */
-unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
-				const u8 index);
-/* Fill in "struct tomoyo_path_info" members. */
-void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
-/* Run policy loader when /sbin/init starts. */
-void tomoyo_load_policy(const char *filename);
-
-void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
-
-/* Convert binary string to ascii string. */
-char *tomoyo_encode(const char *str);
-
-/*
- * Returns realpath(3) of the given pathname except that
- * ignores chroot'ed root and does not follow the final symlink.
- */
-char *tomoyo_realpath_nofollow(const char *pathname);
-/*
- * Returns realpath(3) of the given pathname except that
- * ignores chroot'ed root and the pathname is already solved.
- */
-char *tomoyo_realpath_from_path(struct path *path);
-/* Get patterned pathname. */
-const char *tomoyo_pattern(const struct tomoyo_path_info *filename);
-
-/* Check memory quota. */
-bool tomoyo_memory_ok(void *ptr);
-void *tomoyo_commit_ok(void *data, const unsigned int size);
-
-/*
- * Keep the given name on the RAM.
- * The RAM is shared, so NEVER try to modify or kfree() the returned name.
- */
-const struct tomoyo_path_info *tomoyo_get_name(const char *name);
-
-/* Check for memory usage. */
-void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head);
-
-/* Set memory quota. */
-int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head);
-
-/* Initialize mm related code. */
-void __init tomoyo_mm_init(void);
-int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
-			   const struct tomoyo_path_info *filename);
-int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
-				 struct path *path, const int flag);
-int tomoyo_path_number_perm(const u8 operation, struct path *path,
-			    unsigned long number);
 int tomoyo_mkdev_perm(const u8 operation, struct path *path,
 		      const unsigned int mode, unsigned int dev);
-int tomoyo_path_perm(const u8 operation, struct path *path);
+int tomoyo_mount_permission(char *dev_name, struct path *path,
+			    const char *type, unsigned long flags,
+			    void *data_page);
+int tomoyo_open_control(const u8 type, struct file *file);
 int tomoyo_path2_perm(const u8 operation, struct path *path1,
 		      struct path *path2);
-int tomoyo_find_next_domain(struct linux_binprm *bprm);
-
-void tomoyo_print_ulong(char *buffer, const int buffer_len,
-			const unsigned long value, const u8 type);
-
-/* Drop refcount on tomoyo_name_union. */
-void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
-
-/* Run garbage collector. */
-void tomoyo_run_gc(void);
-
-void tomoyo_memory_free(void *ptr);
-
+int tomoyo_path_number_perm(const u8 operation, struct path *path,
+			    unsigned long number);
+int tomoyo_path_perm(const u8 operation, struct path *path,
+		     const char *target);
+int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
+			   const struct tomoyo_path_info *filename);
+int tomoyo_poll_control(struct file *file, poll_table *wait);
+int tomoyo_poll_log(struct file *file, poll_table *wait);
+int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
+	__printf(2, 3);
 int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
-			 bool is_delete, struct tomoyo_domain_info *domain,
-			 bool (*check_duplicate) (const struct tomoyo_acl_info
-						  *,
-						  const struct tomoyo_acl_info
-						  *),
-			 bool (*merge_duplicate) (struct tomoyo_acl_info *,
-						  struct tomoyo_acl_info *,
-						  const bool));
+			 struct tomoyo_acl_param *param,
+			 bool (*check_duplicate)
+			 (const struct tomoyo_acl_info *,
+			  const struct tomoyo_acl_info *),
+			 bool (*merge_duplicate)
+			 (struct tomoyo_acl_info *, struct tomoyo_acl_info *,
+			  const bool));
 int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
-			 bool is_delete, struct list_head *list,
-			 bool (*check_duplicate) (const struct tomoyo_acl_head
-						  *,
-						  const struct tomoyo_acl_head
-						  *));
+			 struct tomoyo_acl_param *param,
+			 bool (*check_duplicate)
+			 (const struct tomoyo_acl_head *,
+			  const struct tomoyo_acl_head *));
+int tomoyo_write_aggregator(struct tomoyo_acl_param *param);
+int tomoyo_write_file(struct tomoyo_acl_param *param);
+int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type);
+int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
+				    const u8 type);
+ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
+			    const int buffer_len);
+ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+			     const char __user *buffer, const int buffer_len);
+struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param);
+struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
+						const bool transit);
+struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
+struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
+				      const u8 idx);
+struct tomoyo_policy_namespace *tomoyo_assign_namespace
+(const char *domainname);
+struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
+				      const u8 profile);
+unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
+				const u8 index);
+u8 tomoyo_parse_ulong(unsigned long *result, char **str);
+void *tomoyo_commit_ok(void *data, const unsigned int size);
+void __init tomoyo_load_builtin_policy(void);
+void __init tomoyo_mm_init(void);
 void tomoyo_check_acl(struct tomoyo_request_info *r,
 		      bool (*check_entry) (struct tomoyo_request_info *,
 					   const struct tomoyo_acl_info *));
+void tomoyo_check_profile(void);
+void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp);
+void tomoyo_del_condition(struct list_head *element);
+void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
+void tomoyo_get_attributes(struct tomoyo_obj_info *obj);
+void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns);
+void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
+	 __printf(2, 3);
+void tomoyo_load_policy(const char *filename);
+void tomoyo_memory_free(void *ptr);
+void tomoyo_normalize_line(unsigned char *buffer);
+void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register);
+void tomoyo_print_ulong(char *buffer, const int buffer_len,
+			const unsigned long value, const u8 type);
+void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
+void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
+void tomoyo_read_log(struct tomoyo_io_buffer *head);
+void tomoyo_update_stat(const u8 index);
+void tomoyo_warn_oom(const char *function);
+void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
+	__printf(2, 3);
+void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
+		       va_list args);
 
 /********** External variable definitions. **********/
 
-/* Lock for GC. */
-extern struct srcu_struct tomoyo_ss;
-
-/* The list for "struct tomoyo_domain_info". */
-extern struct list_head tomoyo_domain_list;
-
-extern struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY];
-extern struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP];
-extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
-
-/* Lock for protecting policy. */
-extern struct mutex tomoyo_policy_lock;
-
-/* Has /sbin/init started? */
 extern bool tomoyo_policy_loaded;
-
-/* The kernel's domain. */
+extern const char * const tomoyo_condition_keyword
+[TOMOYO_MAX_CONDITION_KEYWORD];
+extern const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
+extern const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+					      + TOMOYO_MAX_MAC_CATEGORY_INDEX];
+extern const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE];
+extern const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
+extern const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX];
+extern const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION];
+extern const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION];
+extern const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION];
+extern struct list_head tomoyo_condition_list;
+extern struct list_head tomoyo_domain_list;
+extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
+extern struct list_head tomoyo_namespace_list;
+extern struct mutex tomoyo_policy_lock;
+extern struct srcu_struct tomoyo_ss;
 extern struct tomoyo_domain_info tomoyo_kernel_domain;
-
-extern const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
-extern const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION];
-extern const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION];
-extern const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION];
-
-extern unsigned int tomoyo_quota_for_query;
-extern unsigned int tomoyo_query_memory_size;
+extern struct tomoyo_policy_namespace tomoyo_kernel_namespace;
+extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
+extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
 
 /********** Inlined functions. **********/
 
+/**
+ * tomoyo_read_lock - Take lock for protecting policy.
+ *
+ * Returns index number for tomoyo_read_unlock().
+ */
 static inline int tomoyo_read_lock(void)
 {
 	return srcu_read_lock(&tomoyo_ss);
 }
 
+/**
+ * tomoyo_read_unlock - Release lock for protecting policy.
+ *
+ * @idx: Index number returned by tomoyo_read_lock().
+ *
+ * Returns nothing.
+ */
 static inline void tomoyo_read_unlock(int idx)
 {
 	srcu_read_unlock(&tomoyo_ss, idx);
 }
 
-/* strcmp() for "struct tomoyo_path_info" structure. */
+/**
+ * tomoyo_sys_getppid - Copy of getppid().
+ *
+ * Returns parent process's PID.
+ *
+ * Alpha does not have getppid() defined. To be able to build this module on
+ * Alpha, I have to copy getppid() from kernel/timer.c.
+ */
+static inline pid_t tomoyo_sys_getppid(void)
+{
+	pid_t pid;
+	rcu_read_lock();
+	pid = task_tgid_vnr(current->real_parent);
+	rcu_read_unlock();
+	return pid;
+}
+
+/**
+ * tomoyo_sys_getpid - Copy of getpid().
+ *
+ * Returns current thread's PID.
+ *
+ * Alpha does not have getpid() defined. To be able to build this module on
+ * Alpha, I have to copy getpid() from kernel/timer.c.
+ */
+static inline pid_t tomoyo_sys_getpid(void)
+{
+	return task_tgid_vnr(current);
+}
+
+/**
+ * tomoyo_pathcmp - strcmp() for "struct tomoyo_path_info" structure.
+ *
+ * @a: Pointer to "struct tomoyo_path_info".
+ * @b: Pointer to "struct tomoyo_path_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a,
 				  const struct tomoyo_path_info *b)
 {
@@ -932,77 +1003,155 @@
 }
 
 /**
- * tomoyo_valid - Check whether the character is a valid char.
+ * tomoyo_put_name - Drop reference on "struct tomoyo_name".
  *
- * @c: The character to check.
+ * @name: Pointer to "struct tomoyo_path_info". Maybe NULL.
  *
- * Returns true if @c is a valid character, false otherwise.
+ * Returns nothing.
  */
-static inline bool tomoyo_valid(const unsigned char c)
-{
-	return c > ' ' && c < 127;
-}
-
-/**
- * tomoyo_invalid - Check whether the character is an invalid char.
- *
- * @c: The character to check.
- *
- * Returns true if @c is an invalid character, false otherwise.
- */
-static inline bool tomoyo_invalid(const unsigned char c)
-{
-	return c && (c <= ' ' || c >= 127);
-}
-
 static inline void tomoyo_put_name(const struct tomoyo_path_info *name)
 {
 	if (name) {
 		struct tomoyo_name *ptr =
 			container_of(name, typeof(*ptr), entry);
-		atomic_dec(&ptr->users);
+		atomic_dec(&ptr->head.users);
 	}
 }
 
+/**
+ * tomoyo_put_condition - Drop reference on "struct tomoyo_condition".
+ *
+ * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
+ *
+ * Returns nothing.
+ */
+static inline void tomoyo_put_condition(struct tomoyo_condition *cond)
+{
+	if (cond)
+		atomic_dec(&cond->head.users);
+}
+
+/**
+ * tomoyo_put_group - Drop reference on "struct tomoyo_group".
+ *
+ * @group: Pointer to "struct tomoyo_group". Maybe NULL.
+ *
+ * Returns nothing.
+ */
 static inline void tomoyo_put_group(struct tomoyo_group *group)
 {
 	if (group)
-		atomic_dec(&group->users);
+		atomic_dec(&group->head.users);
 }
 
+/**
+ * tomoyo_domain - Get "struct tomoyo_domain_info" for current thread.
+ *
+ * Returns pointer to "struct tomoyo_domain_info" for current thread.
+ */
 static inline struct tomoyo_domain_info *tomoyo_domain(void)
 {
 	return current_cred()->security;
 }
 
+/**
+ * tomoyo_real_domain - Get "struct tomoyo_domain_info" for specified thread.
+ *
+ * @task: Pointer to "struct task_struct".
+ *
+ * Returns pointer to "struct tomoyo_security" for specified thread.
+ */
 static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct
 							    *task)
 {
 	return task_cred_xxx(task, security);
 }
 
-static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *p1,
-					   const struct tomoyo_acl_info *p2)
-{
-	return p1->type == p2->type;
-}
-
+/**
+ * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry.
+ *
+ * @a: Pointer to "struct tomoyo_name_union".
+ * @b: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static inline bool tomoyo_same_name_union
-(const struct tomoyo_name_union *p1, const struct tomoyo_name_union *p2)
+(const struct tomoyo_name_union *a, const struct tomoyo_name_union *b)
 {
-	return p1->filename == p2->filename && p1->group == p2->group &&
-		p1->is_group == p2->is_group;
+	return a->filename == b->filename && a->group == b->group;
 }
 
+/**
+ * tomoyo_same_number_union - Check for duplicated "struct tomoyo_number_union" entry.
+ *
+ * @a: Pointer to "struct tomoyo_number_union".
+ * @b: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static inline bool tomoyo_same_number_union
-(const struct tomoyo_number_union *p1, const struct tomoyo_number_union *p2)
+(const struct tomoyo_number_union *a, const struct tomoyo_number_union *b)
 {
-	return p1->values[0] == p2->values[0] && p1->values[1] == p2->values[1]
-		&& p1->group == p2->group && p1->min_type == p2->min_type &&
-		p1->max_type == p2->max_type && p1->is_group == p2->is_group;
+	return a->values[0] == b->values[0] && a->values[1] == b->values[1] &&
+		a->group == b->group && a->value_type[0] == b->value_type[0] &&
+		a->value_type[1] == b->value_type[1];
 }
 
 /**
+ * tomoyo_current_namespace - Get "struct tomoyo_policy_namespace" for current thread.
+ *
+ * Returns pointer to "struct tomoyo_policy_namespace" for current thread.
+ */
+static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void)
+{
+	return tomoyo_domain()->ns;
+}
+
+#if defined(CONFIG_SLOB)
+
+/**
+ * tomoyo_round2 - Round up to power of 2 for calculating memory usage.
+ *
+ * @size: Size to be rounded up.
+ *
+ * Returns @size.
+ *
+ * Since SLOB does not round up, this function simply returns @size.
+ */
+static inline int tomoyo_round2(size_t size)
+{
+	return size;
+}
+
+#else
+
+/**
+ * tomoyo_round2 - Round up to power of 2 for calculating memory usage.
+ *
+ * @size: Size to be rounded up.
+ *
+ * Returns rounded size.
+ *
+ * Strictly speaking, SLAB may be able to allocate (e.g.) 96 bytes instead of
+ * (e.g.) 128 bytes.
+ */
+static inline int tomoyo_round2(size_t size)
+{
+#if PAGE_SIZE == 4096
+	size_t bsize = 32;
+#else
+	size_t bsize = 64;
+#endif
+	if (!size)
+		return 0;
+	while (size > bsize)
+		bsize <<= 1;
+	return bsize;
+}
+
+#endif
+
+/**
  * list_for_each_cookie - iterate over a list with cookie.
  * @pos:        the &struct list_head to use as a loop cursor.
  * @head:       the head for your list.
diff --git a/security/tomoyo/condition.c b/security/tomoyo/condition.c
new file mode 100644
index 0000000..8a05f71
--- /dev/null
+++ b/security/tomoyo/condition.c
@@ -0,0 +1,1035 @@
+/*
+ * security/tomoyo/condition.c
+ *
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
+ */
+
+#include "common.h"
+#include <linux/slab.h>
+
+/* List of "struct tomoyo_condition". */
+LIST_HEAD(tomoyo_condition_list);
+
+/**
+ * tomoyo_argv - Check argv[] in "struct linux_binbrm".
+ *
+ * @index:   Index number of @arg_ptr.
+ * @arg_ptr: Contents of argv[@index].
+ * @argc:    Length of @argv.
+ * @argv:    Pointer to "struct tomoyo_argv".
+ * @checked: Set to true if @argv[@index] was found.
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_argv(const unsigned int index, const char *arg_ptr,
+			const int argc, const struct tomoyo_argv *argv,
+			u8 *checked)
+{
+	int i;
+	struct tomoyo_path_info arg;
+	arg.name = arg_ptr;
+	for (i = 0; i < argc; argv++, checked++, i++) {
+		bool result;
+		if (index != argv->index)
+			continue;
+		*checked = 1;
+		tomoyo_fill_path_info(&arg);
+		result = tomoyo_path_matches_pattern(&arg, argv->value);
+		if (argv->is_not)
+			result = !result;
+		if (!result)
+			return false;
+	}
+	return true;
+}
+
+/**
+ * tomoyo_envp - Check envp[] in "struct linux_binbrm".
+ *
+ * @env_name:  The name of environment variable.
+ * @env_value: The value of environment variable.
+ * @envc:      Length of @envp.
+ * @envp:      Pointer to "struct tomoyo_envp".
+ * @checked:   Set to true if @envp[@env_name] was found.
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_envp(const char *env_name, const char *env_value,
+			const int envc, const struct tomoyo_envp *envp,
+			u8 *checked)
+{
+	int i;
+	struct tomoyo_path_info name;
+	struct tomoyo_path_info value;
+	name.name = env_name;
+	tomoyo_fill_path_info(&name);
+	value.name = env_value;
+	tomoyo_fill_path_info(&value);
+	for (i = 0; i < envc; envp++, checked++, i++) {
+		bool result;
+		if (!tomoyo_path_matches_pattern(&name, envp->name))
+			continue;
+		*checked = 1;
+		if (envp->value) {
+			result = tomoyo_path_matches_pattern(&value,
+							     envp->value);
+			if (envp->is_not)
+				result = !result;
+		} else {
+			result = true;
+			if (!envp->is_not)
+				result = !result;
+		}
+		if (!result)
+			return false;
+	}
+	return true;
+}
+
+/**
+ * tomoyo_scan_bprm - Scan "struct linux_binprm".
+ *
+ * @ee:   Pointer to "struct tomoyo_execve".
+ * @argc: Length of @argc.
+ * @argv: Pointer to "struct tomoyo_argv".
+ * @envc: Length of @envp.
+ * @envp: Poiner to "struct tomoyo_envp".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_scan_bprm(struct tomoyo_execve *ee,
+			     const u16 argc, const struct tomoyo_argv *argv,
+			     const u16 envc, const struct tomoyo_envp *envp)
+{
+	struct linux_binprm *bprm = ee->bprm;
+	struct tomoyo_page_dump *dump = &ee->dump;
+	char *arg_ptr = ee->tmp;
+	int arg_len = 0;
+	unsigned long pos = bprm->p;
+	int offset = pos % PAGE_SIZE;
+	int argv_count = bprm->argc;
+	int envp_count = bprm->envc;
+	bool result = true;
+	u8 local_checked[32];
+	u8 *checked;
+	if (argc + envc <= sizeof(local_checked)) {
+		checked = local_checked;
+		memset(local_checked, 0, sizeof(local_checked));
+	} else {
+		checked = kzalloc(argc + envc, GFP_NOFS);
+		if (!checked)
+			return false;
+	}
+	while (argv_count || envp_count) {
+		if (!tomoyo_dump_page(bprm, pos, dump)) {
+			result = false;
+			goto out;
+		}
+		pos += PAGE_SIZE - offset;
+		while (offset < PAGE_SIZE) {
+			/* Read. */
+			const char *kaddr = dump->data;
+			const unsigned char c = kaddr[offset++];
+			if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) {
+				if (c == '\\') {
+					arg_ptr[arg_len++] = '\\';
+					arg_ptr[arg_len++] = '\\';
+				} else if (c > ' ' && c < 127) {
+					arg_ptr[arg_len++] = c;
+				} else {
+					arg_ptr[arg_len++] = '\\';
+					arg_ptr[arg_len++] = (c >> 6) + '0';
+					arg_ptr[arg_len++] =
+						((c >> 3) & 7) + '0';
+					arg_ptr[arg_len++] = (c & 7) + '0';
+				}
+			} else {
+				arg_ptr[arg_len] = '\0';
+			}
+			if (c)
+				continue;
+			/* Check. */
+			if (argv_count) {
+				if (!tomoyo_argv(bprm->argc - argv_count,
+						 arg_ptr, argc, argv,
+						 checked)) {
+					result = false;
+					break;
+				}
+				argv_count--;
+			} else if (envp_count) {
+				char *cp = strchr(arg_ptr, '=');
+				if (cp) {
+					*cp = '\0';
+					if (!tomoyo_envp(arg_ptr, cp + 1,
+							 envc, envp,
+							 checked + argc)) {
+						result = false;
+						break;
+					}
+				}
+				envp_count--;
+			} else {
+				break;
+			}
+			arg_len = 0;
+		}
+		offset = 0;
+		if (!result)
+			break;
+	}
+out:
+	if (result) {
+		int i;
+		/* Check not-yet-checked entries. */
+		for (i = 0; i < argc; i++) {
+			if (checked[i])
+				continue;
+			/*
+			 * Return true only if all unchecked indexes in
+			 * bprm->argv[] are not matched.
+			 */
+			if (argv[i].is_not)
+				continue;
+			result = false;
+			break;
+		}
+		for (i = 0; i < envc; envp++, i++) {
+			if (checked[argc + i])
+				continue;
+			/*
+			 * Return true only if all unchecked environ variables
+			 * in bprm->envp[] are either undefined or not matched.
+			 */
+			if ((!envp->value && !envp->is_not) ||
+			    (envp->value && envp->is_not))
+				continue;
+			result = false;
+			break;
+		}
+	}
+	if (checked != local_checked)
+		kfree(checked);
+	return result;
+}
+
+/**
+ * tomoyo_scan_exec_realpath - Check "exec.realpath" parameter of "struct tomoyo_condition".
+ *
+ * @file:  Pointer to "struct file".
+ * @ptr:   Pointer to "struct tomoyo_name_union".
+ * @match: True if "exec.realpath=", false if "exec.realpath!=".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_scan_exec_realpath(struct file *file,
+				      const struct tomoyo_name_union *ptr,
+				      const bool match)
+{
+	bool result;
+	struct tomoyo_path_info exe;
+	if (!file)
+		return false;
+	exe.name = tomoyo_realpath_from_path(&file->f_path);
+	if (!exe.name)
+		return false;
+	tomoyo_fill_path_info(&exe);
+	result = tomoyo_compare_name_union(&exe, ptr);
+	kfree(exe.name);
+	return result == match;
+}
+
+/**
+ * tomoyo_get_dqword - tomoyo_get_name() for a quoted string.
+ *
+ * @start: String to save.
+ *
+ * Returns pointer to "struct tomoyo_path_info" on success, NULL otherwise.
+ */
+static const struct tomoyo_path_info *tomoyo_get_dqword(char *start)
+{
+	char *cp = start + strlen(start) - 1;
+	if (cp == start || *start++ != '"' || *cp != '"')
+		return NULL;
+	*cp = '\0';
+	if (*start && !tomoyo_correct_word(start))
+		return NULL;
+	return tomoyo_get_name(start);
+}
+
+/**
+ * tomoyo_parse_name_union_quoted - Parse a quoted word.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr:   Pointer to "struct tomoyo_name_union".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_name_union_quoted(struct tomoyo_acl_param *param,
+					   struct tomoyo_name_union *ptr)
+{
+	char *filename = param->data;
+	if (*filename == '@')
+		return tomoyo_parse_name_union(param, ptr);
+	ptr->filename = tomoyo_get_dqword(filename);
+	return ptr->filename != NULL;
+}
+
+/**
+ * tomoyo_parse_argv - Parse an argv[] condition part.
+ *
+ * @left:  Lefthand value.
+ * @right: Righthand value.
+ * @argv:  Pointer to "struct tomoyo_argv".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_argv(char *left, char *right,
+			      struct tomoyo_argv *argv)
+{
+	if (tomoyo_parse_ulong(&argv->index, &left) !=
+	    TOMOYO_VALUE_TYPE_DECIMAL || *left++ != ']' || *left)
+		return false;
+	argv->value = tomoyo_get_dqword(right);
+	return argv->value != NULL;
+}
+
+/**
+ * tomoyo_parse_envp - Parse an envp[] condition part.
+ *
+ * @left:  Lefthand value.
+ * @right: Righthand value.
+ * @envp:  Pointer to "struct tomoyo_envp".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_envp(char *left, char *right,
+			      struct tomoyo_envp *envp)
+{
+	const struct tomoyo_path_info *name;
+	const struct tomoyo_path_info *value;
+	char *cp = left + strlen(left) - 1;
+	if (*cp-- != ']' || *cp != '"')
+		goto out;
+	*cp = '\0';
+	if (!tomoyo_correct_word(left))
+		goto out;
+	name = tomoyo_get_name(left);
+	if (!name)
+		goto out;
+	if (!strcmp(right, "NULL")) {
+		value = NULL;
+	} else {
+		value = tomoyo_get_dqword(right);
+		if (!value) {
+			tomoyo_put_name(name);
+			goto out;
+		}
+	}
+	envp->name = name;
+	envp->value = value;
+	return true;
+out:
+	return false;
+}
+
+/**
+ * tomoyo_same_condition - Check for duplicated "struct tomoyo_condition" entry.
+ *
+ * @a: Pointer to "struct tomoyo_condition".
+ * @b: Pointer to "struct tomoyo_condition".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_condition(const struct tomoyo_condition *a,
+					 const struct tomoyo_condition *b)
+{
+	return a->size == b->size && a->condc == b->condc &&
+		a->numbers_count == b->numbers_count &&
+		a->names_count == b->names_count &&
+		a->argc == b->argc && a->envc == b->envc &&
+		!memcmp(a + 1, b + 1, a->size - sizeof(*a));
+}
+
+/**
+ * tomoyo_condition_type - Get condition type.
+ *
+ * @word: Keyword string.
+ *
+ * Returns one of values in "enum tomoyo_conditions_index" on success,
+ * TOMOYO_MAX_CONDITION_KEYWORD otherwise.
+ */
+static u8 tomoyo_condition_type(const char *word)
+{
+	u8 i;
+	for (i = 0; i < TOMOYO_MAX_CONDITION_KEYWORD; i++) {
+		if (!strcmp(word, tomoyo_condition_keyword[i]))
+			break;
+	}
+	return i;
+}
+
+/* Define this to enable debug mode. */
+/* #define DEBUG_CONDITION */
+
+#ifdef DEBUG_CONDITION
+#define dprintk printk
+#else
+#define dprintk(...) do { } while (0)
+#endif
+
+/**
+ * tomoyo_commit_condition - Commit "struct tomoyo_condition".
+ *
+ * @entry: Pointer to "struct tomoyo_condition".
+ *
+ * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise.
+ *
+ * This function merges duplicated entries. This function returns NULL if
+ * @entry is not duplicated but memory quota for policy has exceeded.
+ */
+static struct tomoyo_condition *tomoyo_commit_condition
+(struct tomoyo_condition *entry)
+{
+	struct tomoyo_condition *ptr;
+	bool found = false;
+	if (mutex_lock_interruptible(&tomoyo_policy_lock)) {
+		dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__);
+		ptr = NULL;
+		found = true;
+		goto out;
+	}
+	list_for_each_entry_rcu(ptr, &tomoyo_condition_list, head.list) {
+		if (!tomoyo_same_condition(ptr, entry))
+			continue;
+		/* Same entry found. Share this entry. */
+		atomic_inc(&ptr->head.users);
+		found = true;
+		break;
+	}
+	if (!found) {
+		if (tomoyo_memory_ok(entry)) {
+			atomic_set(&entry->head.users, 1);
+			list_add_rcu(&entry->head.list,
+				     &tomoyo_condition_list);
+		} else {
+			found = true;
+			ptr = NULL;
+		}
+	}
+	mutex_unlock(&tomoyo_policy_lock);
+out:
+	if (found) {
+		tomoyo_del_condition(&entry->head.list);
+		kfree(entry);
+		entry = ptr;
+	}
+	return entry;
+}
+
+/**
+ * tomoyo_get_condition - Parse condition part.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise.
+ */
+struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param)
+{
+	struct tomoyo_condition *entry = NULL;
+	struct tomoyo_condition_element *condp = NULL;
+	struct tomoyo_number_union *numbers_p = NULL;
+	struct tomoyo_name_union *names_p = NULL;
+	struct tomoyo_argv *argv = NULL;
+	struct tomoyo_envp *envp = NULL;
+	struct tomoyo_condition e = { };
+	char * const start_of_string = param->data;
+	char * const end_of_string = start_of_string + strlen(start_of_string);
+	char *pos;
+rerun:
+	pos = start_of_string;
+	while (1) {
+		u8 left = -1;
+		u8 right = -1;
+		char *left_word = pos;
+		char *cp;
+		char *right_word;
+		bool is_not;
+		if (!*left_word)
+			break;
+		/*
+		 * Since left-hand condition does not allow use of "path_group"
+		 * or "number_group" and environment variable's names do not
+		 * accept '=', it is guaranteed that the original line consists
+		 * of one or more repetition of $left$operator$right blocks
+		 * where "$left is free from '=' and ' '" and "$operator is
+		 * either '=' or '!='" and "$right is free from ' '".
+		 * Therefore, we can reconstruct the original line at the end
+		 * of dry run even if we overwrite $operator with '\0'.
+		 */
+		cp = strchr(pos, ' ');
+		if (cp) {
+			*cp = '\0'; /* Will restore later. */
+			pos = cp + 1;
+		} else {
+			pos = "";
+		}
+		right_word = strchr(left_word, '=');
+		if (!right_word || right_word == left_word)
+			goto out;
+		is_not = *(right_word - 1) == '!';
+		if (is_not)
+			*(right_word++ - 1) = '\0'; /* Will restore later. */
+		else if (*(right_word + 1) != '=')
+			*right_word++ = '\0'; /* Will restore later. */
+		else
+			goto out;
+		dprintk(KERN_WARNING "%u: <%s>%s=<%s>\n", __LINE__, left_word,
+			is_not ? "!" : "", right_word);
+		if (!strncmp(left_word, "exec.argv[", 10)) {
+			if (!argv) {
+				e.argc++;
+				e.condc++;
+			} else {
+				e.argc--;
+				e.condc--;
+				left = TOMOYO_ARGV_ENTRY;
+				argv->is_not = is_not;
+				if (!tomoyo_parse_argv(left_word + 10,
+						       right_word, argv++))
+					goto out;
+			}
+			goto store_value;
+		}
+		if (!strncmp(left_word, "exec.envp[\"", 11)) {
+			if (!envp) {
+				e.envc++;
+				e.condc++;
+			} else {
+				e.envc--;
+				e.condc--;
+				left = TOMOYO_ENVP_ENTRY;
+				envp->is_not = is_not;
+				if (!tomoyo_parse_envp(left_word + 11,
+						       right_word, envp++))
+					goto out;
+			}
+			goto store_value;
+		}
+		left = tomoyo_condition_type(left_word);
+		dprintk(KERN_WARNING "%u: <%s> left=%u\n", __LINE__, left_word,
+			left);
+		if (left == TOMOYO_MAX_CONDITION_KEYWORD) {
+			if (!numbers_p) {
+				e.numbers_count++;
+			} else {
+				e.numbers_count--;
+				left = TOMOYO_NUMBER_UNION;
+				param->data = left_word;
+				if (*left_word == '@' ||
+				    !tomoyo_parse_number_union(param,
+							       numbers_p++))
+					goto out;
+			}
+		}
+		if (!condp)
+			e.condc++;
+		else
+			e.condc--;
+		if (left == TOMOYO_EXEC_REALPATH ||
+		    left == TOMOYO_SYMLINK_TARGET) {
+			if (!names_p) {
+				e.names_count++;
+			} else {
+				e.names_count--;
+				right = TOMOYO_NAME_UNION;
+				param->data = right_word;
+				if (!tomoyo_parse_name_union_quoted(param,
+								    names_p++))
+					goto out;
+			}
+			goto store_value;
+		}
+		right = tomoyo_condition_type(right_word);
+		if (right == TOMOYO_MAX_CONDITION_KEYWORD) {
+			if (!numbers_p) {
+				e.numbers_count++;
+			} else {
+				e.numbers_count--;
+				right = TOMOYO_NUMBER_UNION;
+				param->data = right_word;
+				if (!tomoyo_parse_number_union(param,
+							       numbers_p++))
+					goto out;
+			}
+		}
+store_value:
+		if (!condp) {
+			dprintk(KERN_WARNING "%u: dry_run left=%u right=%u "
+				"match=%u\n", __LINE__, left, right, !is_not);
+			continue;
+		}
+		condp->left = left;
+		condp->right = right;
+		condp->equals = !is_not;
+		dprintk(KERN_WARNING "%u: left=%u right=%u match=%u\n",
+			__LINE__, condp->left, condp->right,
+			condp->equals);
+		condp++;
+	}
+	dprintk(KERN_INFO "%u: cond=%u numbers=%u names=%u ac=%u ec=%u\n",
+		__LINE__, e.condc, e.numbers_count, e.names_count, e.argc,
+		e.envc);
+	if (entry) {
+		BUG_ON(e.names_count | e.numbers_count | e.argc | e.envc |
+		       e.condc);
+		return tomoyo_commit_condition(entry);
+	}
+	e.size = sizeof(*entry)
+		+ e.condc * sizeof(struct tomoyo_condition_element)
+		+ e.numbers_count * sizeof(struct tomoyo_number_union)
+		+ e.names_count * sizeof(struct tomoyo_name_union)
+		+ e.argc * sizeof(struct tomoyo_argv)
+		+ e.envc * sizeof(struct tomoyo_envp);
+	entry = kzalloc(e.size, GFP_NOFS);
+	if (!entry)
+		return NULL;
+	*entry = e;
+	condp = (struct tomoyo_condition_element *) (entry + 1);
+	numbers_p = (struct tomoyo_number_union *) (condp + e.condc);
+	names_p = (struct tomoyo_name_union *) (numbers_p + e.numbers_count);
+	argv = (struct tomoyo_argv *) (names_p + e.names_count);
+	envp = (struct tomoyo_envp *) (argv + e.argc);
+	{
+		bool flag = false;
+		for (pos = start_of_string; pos < end_of_string; pos++) {
+			if (*pos)
+				continue;
+			if (flag) /* Restore " ". */
+				*pos = ' ';
+			else if (*(pos + 1) == '=') /* Restore "!=". */
+				*pos = '!';
+			else /* Restore "=". */
+				*pos = '=';
+			flag = !flag;
+		}
+	}
+	goto rerun;
+out:
+	dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__);
+	if (entry) {
+		tomoyo_del_condition(&entry->head.list);
+		kfree(entry);
+	}
+	return NULL;
+}
+
+/**
+ * tomoyo_get_attributes - Revalidate "struct inode".
+ *
+ * @obj: Pointer to "struct tomoyo_obj_info".
+ *
+ * Returns nothing.
+ */
+void tomoyo_get_attributes(struct tomoyo_obj_info *obj)
+{
+	u8 i;
+	struct dentry *dentry = NULL;
+
+	for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
+		struct inode *inode;
+		switch (i) {
+		case TOMOYO_PATH1:
+			dentry = obj->path1.dentry;
+			if (!dentry)
+				continue;
+			break;
+		case TOMOYO_PATH2:
+			dentry = obj->path2.dentry;
+			if (!dentry)
+				continue;
+			break;
+		default:
+			if (!dentry)
+				continue;
+			dentry = dget_parent(dentry);
+			break;
+		}
+		inode = dentry->d_inode;
+		if (inode) {
+			struct tomoyo_mini_stat *stat = &obj->stat[i];
+			stat->uid  = inode->i_uid;
+			stat->gid  = inode->i_gid;
+			stat->ino  = inode->i_ino;
+			stat->mode = inode->i_mode;
+			stat->dev  = inode->i_sb->s_dev;
+			stat->rdev = inode->i_rdev;
+			obj->stat_valid[i] = true;
+		}
+		if (i & 1) /* i == TOMOYO_PATH1_PARENT ||
+			      i == TOMOYO_PATH2_PARENT */
+			dput(dentry);
+	}
+}
+
+/**
+ * tomoyo_condition - Check condition part.
+ *
+ * @r:    Pointer to "struct tomoyo_request_info".
+ * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
+ *
+ * Returns true on success, false otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+bool tomoyo_condition(struct tomoyo_request_info *r,
+		      const struct tomoyo_condition *cond)
+{
+	u32 i;
+	unsigned long min_v[2] = { 0, 0 };
+	unsigned long max_v[2] = { 0, 0 };
+	const struct tomoyo_condition_element *condp;
+	const struct tomoyo_number_union *numbers_p;
+	const struct tomoyo_name_union *names_p;
+	const struct tomoyo_argv *argv;
+	const struct tomoyo_envp *envp;
+	struct tomoyo_obj_info *obj;
+	u16 condc;
+	u16 argc;
+	u16 envc;
+	struct linux_binprm *bprm = NULL;
+	if (!cond)
+		return true;
+	condc = cond->condc;
+	argc = cond->argc;
+	envc = cond->envc;
+	obj = r->obj;
+	if (r->ee)
+		bprm = r->ee->bprm;
+	if (!bprm && (argc || envc))
+		return false;
+	condp = (struct tomoyo_condition_element *) (cond + 1);
+	numbers_p = (const struct tomoyo_number_union *) (condp + condc);
+	names_p = (const struct tomoyo_name_union *)
+		(numbers_p + cond->numbers_count);
+	argv = (const struct tomoyo_argv *) (names_p + cond->names_count);
+	envp = (const struct tomoyo_envp *) (argv + argc);
+	for (i = 0; i < condc; i++) {
+		const bool match = condp->equals;
+		const u8 left = condp->left;
+		const u8 right = condp->right;
+		bool is_bitop[2] = { false, false };
+		u8 j;
+		condp++;
+		/* Check argv[] and envp[] later. */
+		if (left == TOMOYO_ARGV_ENTRY || left == TOMOYO_ENVP_ENTRY)
+			continue;
+		/* Check string expressions. */
+		if (right == TOMOYO_NAME_UNION) {
+			const struct tomoyo_name_union *ptr = names_p++;
+			switch (left) {
+				struct tomoyo_path_info *symlink;
+				struct tomoyo_execve *ee;
+				struct file *file;
+			case TOMOYO_SYMLINK_TARGET:
+				symlink = obj ? obj->symlink_target : NULL;
+				if (!symlink ||
+				    !tomoyo_compare_name_union(symlink, ptr)
+				    == match)
+					goto out;
+				break;
+			case TOMOYO_EXEC_REALPATH:
+				ee = r->ee;
+				file = ee ? ee->bprm->file : NULL;
+				if (!tomoyo_scan_exec_realpath(file, ptr,
+							       match))
+					goto out;
+				break;
+			}
+			continue;
+		}
+		/* Check numeric or bit-op expressions. */
+		for (j = 0; j < 2; j++) {
+			const u8 index = j ? right : left;
+			unsigned long value = 0;
+			switch (index) {
+			case TOMOYO_TASK_UID:
+				value = current_uid();
+				break;
+			case TOMOYO_TASK_EUID:
+				value = current_euid();
+				break;
+			case TOMOYO_TASK_SUID:
+				value = current_suid();
+				break;
+			case TOMOYO_TASK_FSUID:
+				value = current_fsuid();
+				break;
+			case TOMOYO_TASK_GID:
+				value = current_gid();
+				break;
+			case TOMOYO_TASK_EGID:
+				value = current_egid();
+				break;
+			case TOMOYO_TASK_SGID:
+				value = current_sgid();
+				break;
+			case TOMOYO_TASK_FSGID:
+				value = current_fsgid();
+				break;
+			case TOMOYO_TASK_PID:
+				value = tomoyo_sys_getpid();
+				break;
+			case TOMOYO_TASK_PPID:
+				value = tomoyo_sys_getppid();
+				break;
+			case TOMOYO_TYPE_IS_SOCKET:
+				value = S_IFSOCK;
+				break;
+			case TOMOYO_TYPE_IS_SYMLINK:
+				value = S_IFLNK;
+				break;
+			case TOMOYO_TYPE_IS_FILE:
+				value = S_IFREG;
+				break;
+			case TOMOYO_TYPE_IS_BLOCK_DEV:
+				value = S_IFBLK;
+				break;
+			case TOMOYO_TYPE_IS_DIRECTORY:
+				value = S_IFDIR;
+				break;
+			case TOMOYO_TYPE_IS_CHAR_DEV:
+				value = S_IFCHR;
+				break;
+			case TOMOYO_TYPE_IS_FIFO:
+				value = S_IFIFO;
+				break;
+			case TOMOYO_MODE_SETUID:
+				value = S_ISUID;
+				break;
+			case TOMOYO_MODE_SETGID:
+				value = S_ISGID;
+				break;
+			case TOMOYO_MODE_STICKY:
+				value = S_ISVTX;
+				break;
+			case TOMOYO_MODE_OWNER_READ:
+				value = S_IRUSR;
+				break;
+			case TOMOYO_MODE_OWNER_WRITE:
+				value = S_IWUSR;
+				break;
+			case TOMOYO_MODE_OWNER_EXECUTE:
+				value = S_IXUSR;
+				break;
+			case TOMOYO_MODE_GROUP_READ:
+				value = S_IRGRP;
+				break;
+			case TOMOYO_MODE_GROUP_WRITE:
+				value = S_IWGRP;
+				break;
+			case TOMOYO_MODE_GROUP_EXECUTE:
+				value = S_IXGRP;
+				break;
+			case TOMOYO_MODE_OTHERS_READ:
+				value = S_IROTH;
+				break;
+			case TOMOYO_MODE_OTHERS_WRITE:
+				value = S_IWOTH;
+				break;
+			case TOMOYO_MODE_OTHERS_EXECUTE:
+				value = S_IXOTH;
+				break;
+			case TOMOYO_EXEC_ARGC:
+				if (!bprm)
+					goto out;
+				value = bprm->argc;
+				break;
+			case TOMOYO_EXEC_ENVC:
+				if (!bprm)
+					goto out;
+				value = bprm->envc;
+				break;
+			case TOMOYO_NUMBER_UNION:
+				/* Fetch values later. */
+				break;
+			default:
+				if (!obj)
+					goto out;
+				if (!obj->validate_done) {
+					tomoyo_get_attributes(obj);
+					obj->validate_done = true;
+				}
+				{
+					u8 stat_index;
+					struct tomoyo_mini_stat *stat;
+					switch (index) {
+					case TOMOYO_PATH1_UID:
+					case TOMOYO_PATH1_GID:
+					case TOMOYO_PATH1_INO:
+					case TOMOYO_PATH1_MAJOR:
+					case TOMOYO_PATH1_MINOR:
+					case TOMOYO_PATH1_TYPE:
+					case TOMOYO_PATH1_DEV_MAJOR:
+					case TOMOYO_PATH1_DEV_MINOR:
+					case TOMOYO_PATH1_PERM:
+						stat_index = TOMOYO_PATH1;
+						break;
+					case TOMOYO_PATH2_UID:
+					case TOMOYO_PATH2_GID:
+					case TOMOYO_PATH2_INO:
+					case TOMOYO_PATH2_MAJOR:
+					case TOMOYO_PATH2_MINOR:
+					case TOMOYO_PATH2_TYPE:
+					case TOMOYO_PATH2_DEV_MAJOR:
+					case TOMOYO_PATH2_DEV_MINOR:
+					case TOMOYO_PATH2_PERM:
+						stat_index = TOMOYO_PATH2;
+						break;
+					case TOMOYO_PATH1_PARENT_UID:
+					case TOMOYO_PATH1_PARENT_GID:
+					case TOMOYO_PATH1_PARENT_INO:
+					case TOMOYO_PATH1_PARENT_PERM:
+						stat_index =
+							TOMOYO_PATH1_PARENT;
+						break;
+					case TOMOYO_PATH2_PARENT_UID:
+					case TOMOYO_PATH2_PARENT_GID:
+					case TOMOYO_PATH2_PARENT_INO:
+					case TOMOYO_PATH2_PARENT_PERM:
+						stat_index =
+							TOMOYO_PATH2_PARENT;
+						break;
+					default:
+						goto out;
+					}
+					if (!obj->stat_valid[stat_index])
+						goto out;
+					stat = &obj->stat[stat_index];
+					switch (index) {
+					case TOMOYO_PATH1_UID:
+					case TOMOYO_PATH2_UID:
+					case TOMOYO_PATH1_PARENT_UID:
+					case TOMOYO_PATH2_PARENT_UID:
+						value = stat->uid;
+						break;
+					case TOMOYO_PATH1_GID:
+					case TOMOYO_PATH2_GID:
+					case TOMOYO_PATH1_PARENT_GID:
+					case TOMOYO_PATH2_PARENT_GID:
+						value = stat->gid;
+						break;
+					case TOMOYO_PATH1_INO:
+					case TOMOYO_PATH2_INO:
+					case TOMOYO_PATH1_PARENT_INO:
+					case TOMOYO_PATH2_PARENT_INO:
+						value = stat->ino;
+						break;
+					case TOMOYO_PATH1_MAJOR:
+					case TOMOYO_PATH2_MAJOR:
+						value = MAJOR(stat->dev);
+						break;
+					case TOMOYO_PATH1_MINOR:
+					case TOMOYO_PATH2_MINOR:
+						value = MINOR(stat->dev);
+						break;
+					case TOMOYO_PATH1_TYPE:
+					case TOMOYO_PATH2_TYPE:
+						value = stat->mode & S_IFMT;
+						break;
+					case TOMOYO_PATH1_DEV_MAJOR:
+					case TOMOYO_PATH2_DEV_MAJOR:
+						value = MAJOR(stat->rdev);
+						break;
+					case TOMOYO_PATH1_DEV_MINOR:
+					case TOMOYO_PATH2_DEV_MINOR:
+						value = MINOR(stat->rdev);
+						break;
+					case TOMOYO_PATH1_PERM:
+					case TOMOYO_PATH2_PERM:
+					case TOMOYO_PATH1_PARENT_PERM:
+					case TOMOYO_PATH2_PARENT_PERM:
+						value = stat->mode & S_IALLUGO;
+						break;
+					}
+				}
+				break;
+			}
+			max_v[j] = value;
+			min_v[j] = value;
+			switch (index) {
+			case TOMOYO_MODE_SETUID:
+			case TOMOYO_MODE_SETGID:
+			case TOMOYO_MODE_STICKY:
+			case TOMOYO_MODE_OWNER_READ:
+			case TOMOYO_MODE_OWNER_WRITE:
+			case TOMOYO_MODE_OWNER_EXECUTE:
+			case TOMOYO_MODE_GROUP_READ:
+			case TOMOYO_MODE_GROUP_WRITE:
+			case TOMOYO_MODE_GROUP_EXECUTE:
+			case TOMOYO_MODE_OTHERS_READ:
+			case TOMOYO_MODE_OTHERS_WRITE:
+			case TOMOYO_MODE_OTHERS_EXECUTE:
+				is_bitop[j] = true;
+			}
+		}
+		if (left == TOMOYO_NUMBER_UNION) {
+			/* Fetch values now. */
+			const struct tomoyo_number_union *ptr = numbers_p++;
+			min_v[0] = ptr->values[0];
+			max_v[0] = ptr->values[1];
+		}
+		if (right == TOMOYO_NUMBER_UNION) {
+			/* Fetch values now. */
+			const struct tomoyo_number_union *ptr = numbers_p++;
+			if (ptr->group) {
+				if (tomoyo_number_matches_group(min_v[0],
+								max_v[0],
+								ptr->group)
+				    == match)
+					continue;
+			} else {
+				if ((min_v[0] <= ptr->values[1] &&
+				     max_v[0] >= ptr->values[0]) == match)
+					continue;
+			}
+			goto out;
+		}
+		/*
+		 * Bit operation is valid only when counterpart value
+		 * represents permission.
+		 */
+		if (is_bitop[0] && is_bitop[1]) {
+			goto out;
+		} else if (is_bitop[0]) {
+			switch (right) {
+			case TOMOYO_PATH1_PERM:
+			case TOMOYO_PATH1_PARENT_PERM:
+			case TOMOYO_PATH2_PERM:
+			case TOMOYO_PATH2_PARENT_PERM:
+				if (!(max_v[0] & max_v[1]) == !match)
+					continue;
+			}
+			goto out;
+		} else if (is_bitop[1]) {
+			switch (left) {
+			case TOMOYO_PATH1_PERM:
+			case TOMOYO_PATH1_PARENT_PERM:
+			case TOMOYO_PATH2_PERM:
+			case TOMOYO_PATH2_PARENT_PERM:
+				if (!(max_v[0] & max_v[1]) == !match)
+					continue;
+			}
+			goto out;
+		}
+		/* Normal value range comparison. */
+		if ((min_v[0] <= max_v[1] && max_v[0] >= min_v[1]) == match)
+			continue;
+out:
+		return false;
+	}
+	/* Check argv[] and envp[] now. */
+	if (r->ee && (argc || envc))
+		return tomoyo_scan_bprm(r->ee, argc, argv, envc, envp);
+	return true;
+}
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 3538840..cd0f92d 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/domain.c
  *
- * Domain transition functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
@@ -20,8 +18,7 @@
  *
  * @new_entry:       Pointer to "struct tomoyo_acl_info".
  * @size:            Size of @new_entry in bytes.
- * @is_delete:       True if it is a delete request.
- * @list:            Pointer to "struct list_head".
+ * @param:           Pointer to "struct tomoyo_acl_param".
  * @check_duplicate: Callback function to find duplicated entry.
  *
  * Returns 0 on success, negative value otherwise.
@@ -29,25 +26,26 @@
  * Caller holds tomoyo_read_lock().
  */
 int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
-			 bool is_delete, struct list_head *list,
+			 struct tomoyo_acl_param *param,
 			 bool (*check_duplicate) (const struct tomoyo_acl_head
 						  *,
 						  const struct tomoyo_acl_head
 						  *))
 {
-	int error = is_delete ? -ENOENT : -ENOMEM;
+	int error = param->is_delete ? -ENOENT : -ENOMEM;
 	struct tomoyo_acl_head *entry;
+	struct list_head *list = param->list;
 
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		return -ENOMEM;
 	list_for_each_entry_rcu(entry, list, list) {
 		if (!check_duplicate(entry, new_entry))
 			continue;
-		entry->is_deleted = is_delete;
+		entry->is_deleted = param->is_delete;
 		error = 0;
 		break;
 	}
-	if (error && !is_delete) {
+	if (error && !param->is_delete) {
 		entry = tomoyo_commit_ok(new_entry, size);
 		if (entry) {
 			list_add_tail_rcu(&entry->list, list);
@@ -59,12 +57,25 @@
 }
 
 /**
+ * tomoyo_same_acl_head - Check for duplicated "struct tomoyo_acl_info" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *a,
+					const struct tomoyo_acl_info *b)
+{
+	return a->type == b->type && a->cond == b->cond;
+}
+
+/**
  * tomoyo_update_domain - Update an entry for domain policy.
  *
  * @new_entry:       Pointer to "struct tomoyo_acl_info".
  * @size:            Size of @new_entry in bytes.
- * @is_delete:       True if it is a delete request.
- * @domain:          Pointer to "struct tomoyo_domain_info".
+ * @param:           Pointer to "struct tomoyo_acl_param".
  * @check_duplicate: Callback function to find duplicated entry.
  * @merge_duplicate: Callback function to merge duplicated entry.
  *
@@ -73,7 +84,7 @@
  * Caller holds tomoyo_read_lock().
  */
 int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
-			 bool is_delete, struct tomoyo_domain_info *domain,
+			 struct tomoyo_acl_param *param,
 			 bool (*check_duplicate) (const struct tomoyo_acl_info
 						  *,
 						  const struct tomoyo_acl_info
@@ -82,13 +93,21 @@
 						  struct tomoyo_acl_info *,
 						  const bool))
 {
+	const bool is_delete = param->is_delete;
 	int error = is_delete ? -ENOENT : -ENOMEM;
 	struct tomoyo_acl_info *entry;
+	struct list_head * const list = param->list;
 
+	if (param->data[0]) {
+		new_entry->cond = tomoyo_get_condition(param);
+		if (!new_entry->cond)
+			return -EINVAL;
+	}
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
-		return error;
-	list_for_each_entry_rcu(entry, &domain->acl_info_list, list) {
-		if (!check_duplicate(entry, new_entry))
+		goto out;
+	list_for_each_entry_rcu(entry, list, list) {
+		if (!tomoyo_same_acl_head(entry, new_entry) ||
+		    !check_duplicate(entry, new_entry))
 			continue;
 		if (merge_duplicate)
 			entry->is_deleted = merge_duplicate(entry, new_entry,
@@ -101,28 +120,50 @@
 	if (error && !is_delete) {
 		entry = tomoyo_commit_ok(new_entry, size);
 		if (entry) {
-			list_add_tail_rcu(&entry->list, &domain->acl_info_list);
+			list_add_tail_rcu(&entry->list, list);
 			error = 0;
 		}
 	}
 	mutex_unlock(&tomoyo_policy_lock);
+out:
+	tomoyo_put_condition(new_entry->cond);
 	return error;
 }
 
+/**
+ * tomoyo_check_acl - Do permission check.
+ *
+ * @r:           Pointer to "struct tomoyo_request_info".
+ * @check_entry: Callback function to check type specific parameters.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
 void tomoyo_check_acl(struct tomoyo_request_info *r,
 		      bool (*check_entry) (struct tomoyo_request_info *,
 					   const struct tomoyo_acl_info *))
 {
 	const struct tomoyo_domain_info *domain = r->domain;
 	struct tomoyo_acl_info *ptr;
+	bool retried = false;
+	const struct list_head *list = &domain->acl_info_list;
 
-	list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+retry:
+	list_for_each_entry_rcu(ptr, list, list) {
 		if (ptr->is_deleted || ptr->type != r->param_type)
 			continue;
-		if (check_entry(r, ptr)) {
-			r->granted = true;
-			return;
-		}
+		if (!check_entry(r, ptr))
+			continue;
+		if (!tomoyo_condition(r, ptr->cond))
+			continue;
+		r->granted = true;
+		return;
+	}
+	if (!retried) {
+		retried = true;
+		list = &domain->ns->acl_group[domain->group];
+		goto retry;
 	}
 	r->granted = false;
 }
@@ -130,24 +171,29 @@
 /* The list for "struct tomoyo_domain_info". */
 LIST_HEAD(tomoyo_domain_list);
 
-struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY];
-struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP];
-
 /**
  * tomoyo_last_word - Get last component of a domainname.
  *
- * @domainname: Domainname to check.
+ * @name: Domainname to check.
  *
  * Returns the last word of @domainname.
  */
 static const char *tomoyo_last_word(const char *name)
 {
-        const char *cp = strrchr(name, ' ');
-        if (cp)
-                return cp + 1;
-        return name;
+	const char *cp = strrchr(name, ' ');
+	if (cp)
+		return cp + 1;
+	return name;
 }
 
+/**
+ * tomoyo_same_transition_control - Check for duplicated "struct tomoyo_transition_control" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a,
 					   const struct tomoyo_acl_head *b)
 {
@@ -163,30 +209,36 @@
 }
 
 /**
- * tomoyo_update_transition_control_entry - Update "struct tomoyo_transition_control" list.
+ * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
  *
- * @domainname: The name of domain. Maybe NULL.
- * @program:    The name of program. Maybe NULL.
- * @type:       Type of transition.
- * @is_delete:  True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @type:  Type of this entry.
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_update_transition_control_entry(const char *domainname,
-						  const char *program,
-						  const u8 type,
-						  const bool is_delete)
+int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
+				    const u8 type)
 {
 	struct tomoyo_transition_control e = { .type = type };
-	int error = is_delete ? -ENOENT : -ENOMEM;
-	if (program) {
+	int error = param->is_delete ? -ENOENT : -ENOMEM;
+	char *program = param->data;
+	char *domainname = strstr(program, " from ");
+	if (domainname) {
+		*domainname = '\0';
+		domainname += 6;
+	} else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
+		   type == TOMOYO_TRANSITION_CONTROL_KEEP) {
+		domainname = program;
+		program = NULL;
+	}
+	if (program && strcmp(program, "any")) {
 		if (!tomoyo_correct_path(program))
 			return -EINVAL;
 		e.program = tomoyo_get_name(program);
 		if (!e.program)
 			goto out;
 	}
-	if (domainname) {
+	if (domainname && strcmp(domainname, "any")) {
 		if (!tomoyo_correct_domain(domainname)) {
 			if (!tomoyo_correct_path(domainname))
 				goto out;
@@ -196,126 +248,136 @@
 		if (!e.domainname)
 			goto out;
 	}
-	error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-				     &tomoyo_policy_list
-				     [TOMOYO_ID_TRANSITION_CONTROL],
+	param->list = &param->ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
+	error = tomoyo_update_policy(&e.head, sizeof(e), param,
 				     tomoyo_same_transition_control);
- out:
+out:
 	tomoyo_put_name(e.domainname);
 	tomoyo_put_name(e.program);
 	return error;
 }
 
 /**
- * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
+ * tomoyo_scan_transition - Try to find specific domain transition type.
  *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- * @type:      Type of this entry.
+ * @list:       Pointer to "struct list_head".
+ * @domainname: The name of current domain.
+ * @program:    The name of requested program.
+ * @last_name:  The last component of @domainname.
+ * @type:       One of values in "enum tomoyo_transition_type".
  *
- * Returns 0 on success, negative value otherwise.
+ * Returns true if found one, false otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_transition_control(char *data, const bool is_delete,
-				    const u8 type)
+static inline bool tomoyo_scan_transition
+(const struct list_head *list, const struct tomoyo_path_info *domainname,
+ const struct tomoyo_path_info *program, const char *last_name,
+ const enum tomoyo_transition_type type)
 {
-	char *domainname = strstr(data, " from ");
-	if (domainname) {
-		*domainname = '\0';
-		domainname += 6;
-	} else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
-		   type == TOMOYO_TRANSITION_CONTROL_KEEP) {
-		domainname = data;
-		data = NULL;
+	const struct tomoyo_transition_control *ptr;
+	list_for_each_entry_rcu(ptr, list, head.list) {
+		if (ptr->head.is_deleted || ptr->type != type)
+			continue;
+		if (ptr->domainname) {
+			if (!ptr->is_last_name) {
+				if (ptr->domainname != domainname)
+					continue;
+			} else {
+				/*
+				 * Use direct strcmp() since this is
+				 * unlikely used.
+				 */
+				if (strcmp(ptr->domainname->name, last_name))
+					continue;
+			}
+		}
+		if (ptr->program && tomoyo_pathcmp(ptr->program, program))
+			continue;
+		return true;
 	}
-	return tomoyo_update_transition_control_entry(domainname, data, type,
-						      is_delete);
+	return false;
 }
 
 /**
  * tomoyo_transition_type - Get domain transition type.
  *
- * @domainname: The name of domain.
- * @program:    The name of program.
+ * @ns:         Pointer to "struct tomoyo_policy_namespace".
+ * @domainname: The name of current domain.
+ * @program:    The name of requested program.
  *
- * Returns TOMOYO_TRANSITION_CONTROL_INITIALIZE if executing @program
- * reinitializes domain transition, TOMOYO_TRANSITION_CONTROL_KEEP if executing
- * @program suppresses domain transition, others otherwise.
+ * Returns TOMOYO_TRANSITION_CONTROL_TRANSIT if executing @program causes
+ * domain transition across namespaces, TOMOYO_TRANSITION_CONTROL_INITIALIZE if
+ * executing @program reinitializes domain transition within that namespace,
+ * TOMOYO_TRANSITION_CONTROL_KEEP if executing @program stays at @domainname ,
+ * others otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static u8 tomoyo_transition_type(const struct tomoyo_path_info *domainname,
-				 const struct tomoyo_path_info *program)
+static enum tomoyo_transition_type tomoyo_transition_type
+(const struct tomoyo_policy_namespace *ns,
+ const struct tomoyo_path_info *domainname,
+ const struct tomoyo_path_info *program)
 {
-	const struct tomoyo_transition_control *ptr;
 	const char *last_name = tomoyo_last_word(domainname->name);
-	u8 type;
-	for (type = 0; type < TOMOYO_MAX_TRANSITION_TYPE; type++) {
- next:
-		list_for_each_entry_rcu(ptr, &tomoyo_policy_list
-					[TOMOYO_ID_TRANSITION_CONTROL],
-					head.list) {
-			if (ptr->head.is_deleted || ptr->type != type)
-				continue;
-			if (ptr->domainname) {
-				if (!ptr->is_last_name) {
-					if (ptr->domainname != domainname)
-						continue;
-				} else {
-					/*
-					 * Use direct strcmp() since this is
-					 * unlikely used.
-					 */
-					if (strcmp(ptr->domainname->name,
-						   last_name))
-						continue;
-				}
-			}
-			if (ptr->program &&
-			    tomoyo_pathcmp(ptr->program, program))
-				continue;
-			if (type == TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE) {
-				/*
-				 * Do not check for initialize_domain if
-				 * no_initialize_domain matched.
-				 */
-				type = TOMOYO_TRANSITION_CONTROL_NO_KEEP;
-				goto next;
-			}
-			goto done;
+	enum tomoyo_transition_type type = TOMOYO_TRANSITION_CONTROL_NO_RESET;
+	while (type < TOMOYO_MAX_TRANSITION_TYPE) {
+		const struct list_head * const list =
+			&ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
+		if (!tomoyo_scan_transition(list, domainname, program,
+					    last_name, type)) {
+			type++;
+			continue;
 		}
+		if (type != TOMOYO_TRANSITION_CONTROL_NO_RESET &&
+		    type != TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE)
+			break;
+		/*
+		 * Do not check for reset_domain if no_reset_domain matched.
+		 * Do not check for initialize_domain if no_initialize_domain
+		 * matched.
+		 */
+		type++;
+		type++;
 	}
- done:
 	return type;
 }
 
+/**
+ * tomoyo_same_aggregator - Check for duplicated "struct tomoyo_aggregator" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_aggregator(const struct tomoyo_acl_head *a,
 				   const struct tomoyo_acl_head *b)
 {
-	const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1), head);
-	const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2), head);
+	const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1),
+							  head);
+	const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2),
+							  head);
 	return p1->original_name == p2->original_name &&
 		p1->aggregated_name == p2->aggregated_name;
 }
 
 /**
- * tomoyo_update_aggregator_entry - Update "struct tomoyo_aggregator" list.
+ * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
  *
- * @original_name:   The original program's name.
- * @aggregated_name: The program name to use.
- * @is_delete:       True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_aggregator_entry(const char *original_name,
-					  const char *aggregated_name,
-					  const bool is_delete)
+int tomoyo_write_aggregator(struct tomoyo_acl_param *param)
 {
 	struct tomoyo_aggregator e = { };
-	int error = is_delete ? -ENOENT : -ENOMEM;
-
-	if (!tomoyo_correct_path(original_name) ||
+	int error = param->is_delete ? -ENOENT : -ENOMEM;
+	const char *original_name = tomoyo_read_token(param);
+	const char *aggregated_name = tomoyo_read_token(param);
+	if (!tomoyo_correct_word(original_name) ||
 	    !tomoyo_correct_path(aggregated_name))
 		return -EINVAL;
 	e.original_name = tomoyo_get_name(original_name);
@@ -323,83 +385,181 @@
 	if (!e.original_name || !e.aggregated_name ||
 	    e.aggregated_name->is_patterned) /* No patterns allowed. */
 		goto out;
-	error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-				     &tomoyo_policy_list[TOMOYO_ID_AGGREGATOR],
+	param->list = &param->ns->policy_list[TOMOYO_ID_AGGREGATOR];
+	error = tomoyo_update_policy(&e.head, sizeof(e), param,
 				     tomoyo_same_aggregator);
- out:
+out:
 	tomoyo_put_name(e.original_name);
 	tomoyo_put_name(e.aggregated_name);
 	return error;
 }
 
 /**
- * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
+ * tomoyo_find_namespace - Find specified namespace.
  *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
+ * @name: Name of namespace to find.
+ * @len:  Length of @name.
  *
- * Returns 0 on success, negative value otherwise.
+ * Returns pointer to "struct tomoyo_policy_namespace" if found,
+ * NULL otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_aggregator(char *data, const bool is_delete)
+static struct tomoyo_policy_namespace *tomoyo_find_namespace
+(const char *name, const unsigned int len)
 {
-	char *cp = strchr(data, ' ');
-
-	if (!cp)
-		return -EINVAL;
-	*cp++ = '\0';
-	return tomoyo_update_aggregator_entry(data, cp, is_delete);
+	struct tomoyo_policy_namespace *ns;
+	list_for_each_entry(ns, &tomoyo_namespace_list, namespace_list) {
+		if (strncmp(name, ns->name, len) ||
+		    (name[len] && name[len] != ' '))
+			continue;
+		return ns;
+	}
+	return NULL;
 }
 
 /**
- * tomoyo_assign_domain - Create a domain.
+ * tomoyo_assign_namespace - Create a new namespace.
+ *
+ * @domainname: Name of namespace to create.
+ *
+ * Returns pointer to "struct tomoyo_policy_namespace" on success,
+ * NULL otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+struct tomoyo_policy_namespace *tomoyo_assign_namespace(const char *domainname)
+{
+	struct tomoyo_policy_namespace *ptr;
+	struct tomoyo_policy_namespace *entry;
+	const char *cp = domainname;
+	unsigned int len = 0;
+	while (*cp && *cp++ != ' ')
+		len++;
+	ptr = tomoyo_find_namespace(domainname, len);
+	if (ptr)
+		return ptr;
+	if (len >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_domain_def(domainname))
+		return NULL;
+	entry = kzalloc(sizeof(*entry) + len + 1, GFP_NOFS);
+	if (!entry)
+		return NULL;
+	if (mutex_lock_interruptible(&tomoyo_policy_lock))
+		goto out;
+	ptr = tomoyo_find_namespace(domainname, len);
+	if (!ptr && tomoyo_memory_ok(entry)) {
+		char *name = (char *) (entry + 1);
+		ptr = entry;
+		memmove(name, domainname, len);
+		name[len] = '\0';
+		entry->name = name;
+		tomoyo_init_policy_namespace(entry);
+		entry = NULL;
+	}
+	mutex_unlock(&tomoyo_policy_lock);
+out:
+	kfree(entry);
+	return ptr;
+}
+
+/**
+ * tomoyo_namespace_jump - Check for namespace jump.
+ *
+ * @domainname: Name of domain.
+ *
+ * Returns true if namespace differs, false otherwise.
+ */
+static bool tomoyo_namespace_jump(const char *domainname)
+{
+	const char *namespace = tomoyo_current_namespace()->name;
+	const int len = strlen(namespace);
+	return strncmp(domainname, namespace, len) ||
+		(domainname[len] && domainname[len] != ' ');
+}
+
+/**
+ * tomoyo_assign_domain - Create a domain or a namespace.
  *
  * @domainname: The name of domain.
- * @profile:    Profile number to assign if the domain was newly created.
+ * @transit:    True if transit to domain found or created.
  *
  * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
 struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
-						const u8 profile)
+						const bool transit)
 {
-	struct tomoyo_domain_info *entry;
-	struct tomoyo_domain_info *domain = NULL;
-	const struct tomoyo_path_info *saved_domainname;
-	bool found = false;
-
-	if (!tomoyo_correct_domain(domainname))
+	struct tomoyo_domain_info e = { };
+	struct tomoyo_domain_info *entry = tomoyo_find_domain(domainname);
+	bool created = false;
+	if (entry) {
+		if (transit) {
+			/*
+			 * Since namespace is created at runtime, profiles may
+			 * not be created by the moment the process transits to
+			 * that domain. Do not perform domain transition if
+			 * profile for that domain is not yet created.
+			 */
+			if (!entry->ns->profile_ptr[entry->profile])
+				return NULL;
+		}
+		return entry;
+	}
+	/* Requested domain does not exist. */
+	/* Don't create requested domain if domainname is invalid. */
+	if (strlen(domainname) >= TOMOYO_EXEC_TMPSIZE - 10 ||
+	    !tomoyo_correct_domain(domainname))
 		return NULL;
-	saved_domainname = tomoyo_get_name(domainname);
-	if (!saved_domainname)
+	/*
+	 * Since definition of profiles and acl_groups may differ across
+	 * namespaces, do not inherit "use_profile" and "use_group" settings
+	 * by automatically creating requested domain upon domain transition.
+	 */
+	if (transit && tomoyo_namespace_jump(domainname))
 		return NULL;
-	entry = kzalloc(sizeof(*entry), GFP_NOFS);
+	e.ns = tomoyo_assign_namespace(domainname);
+	if (!e.ns)
+		return NULL;
+	/*
+	 * "use_profile" and "use_group" settings for automatically created
+	 * domains are inherited from current domain. These are 0 for manually
+	 * created domains.
+	 */
+	if (transit) {
+		const struct tomoyo_domain_info *domain = tomoyo_domain();
+		e.profile = domain->profile;
+		e.group = domain->group;
+	}
+	e.domainname = tomoyo_get_name(domainname);
+	if (!e.domainname)
+		return NULL;
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		goto out;
-	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
-		if (domain->is_deleted ||
-		    tomoyo_pathcmp(saved_domainname, domain->domainname))
-			continue;
-		found = true;
-		break;
-	}
-	if (!found && tomoyo_memory_ok(entry)) {
-		INIT_LIST_HEAD(&entry->acl_info_list);
-		entry->domainname = saved_domainname;
-		saved_domainname = NULL;
-		entry->profile = profile;
-		list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
-		domain = entry;
-		entry = NULL;
-		found = true;
+	entry = tomoyo_find_domain(domainname);
+	if (!entry) {
+		entry = tomoyo_commit_ok(&e, sizeof(e));
+		if (entry) {
+			INIT_LIST_HEAD(&entry->acl_info_list);
+			list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
+			created = true;
+		}
 	}
 	mutex_unlock(&tomoyo_policy_lock);
- out:
-	tomoyo_put_name(saved_domainname);
-	kfree(entry);
-	return found ? domain : NULL;
+out:
+	tomoyo_put_name(e.domainname);
+	if (entry && transit) {
+		if (created) {
+			struct tomoyo_request_info r;
+			tomoyo_init_request_info(&r, entry,
+						 TOMOYO_MAC_FILE_EXECUTE);
+			r.granted = false;
+			tomoyo_write_log(&r, "use_profile %u\n",
+					 entry->profile);
+			tomoyo_write_log(&r, "use_group %u\n", entry->group);
+		}
+	}
+	return entry;
 }
 
 /**
@@ -413,22 +573,27 @@
  */
 int tomoyo_find_next_domain(struct linux_binprm *bprm)
 {
-	struct tomoyo_request_info r;
-	char *tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
 	struct tomoyo_domain_info *old_domain = tomoyo_domain();
 	struct tomoyo_domain_info *domain = NULL;
 	const char *original_name = bprm->filename;
-	u8 mode;
-	bool is_enforce;
 	int retval = -ENOMEM;
 	bool need_kfree = false;
+	bool reject_on_transition_failure = false;
 	struct tomoyo_path_info rn = { }; /* real name */
-
-	mode = tomoyo_init_request_info(&r, NULL, TOMOYO_MAC_FILE_EXECUTE);
-	is_enforce = (mode == TOMOYO_CONFIG_ENFORCING);
-	if (!tmp)
-		goto out;
-
+	struct tomoyo_execve *ee = kzalloc(sizeof(*ee), GFP_NOFS);
+	if (!ee)
+		return -ENOMEM;
+	ee->tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
+	if (!ee->tmp) {
+		kfree(ee);
+		return -ENOMEM;
+	}
+	/* ee->dump->data is allocated by tomoyo_dump_page(). */
+	tomoyo_init_request_info(&ee->r, NULL, TOMOYO_MAC_FILE_EXECUTE);
+	ee->r.ee = ee;
+	ee->bprm = bprm;
+	ee->r.obj = &ee->obj;
+	ee->obj.path1 = bprm->file->f_path;
  retry:
 	if (need_kfree) {
 		kfree(rn.name);
@@ -445,8 +610,10 @@
 	/* Check 'aggregator' directive. */
 	{
 		struct tomoyo_aggregator *ptr;
-		list_for_each_entry_rcu(ptr, &tomoyo_policy_list
-					[TOMOYO_ID_AGGREGATOR], head.list) {
+		struct list_head *list =
+			&old_domain->ns->policy_list[TOMOYO_ID_AGGREGATOR];
+		/* Check 'aggregator' directive. */
+		list_for_each_entry_rcu(ptr, list, head.list) {
 			if (ptr->head.is_deleted ||
 			    !tomoyo_path_matches_pattern(&rn,
 							 ptr->original_name))
@@ -460,7 +627,7 @@
 	}
 
 	/* Check execute permission. */
-	retval = tomoyo_path_permission(&r, TOMOYO_TYPE_EXECUTE, &rn);
+	retval = tomoyo_path_permission(&ee->r, TOMOYO_TYPE_EXECUTE, &rn);
 	if (retval == TOMOYO_RETRY_REQUEST)
 		goto retry;
 	if (retval < 0)
@@ -471,20 +638,30 @@
 	 * wildcard) rather than the pathname passed to execve()
 	 * (which never contains wildcard).
 	 */
-	if (r.param.path.matched_path) {
+	if (ee->r.param.path.matched_path) {
 		if (need_kfree)
 			kfree(rn.name);
 		need_kfree = false;
 		/* This is OK because it is read only. */
-		rn = *r.param.path.matched_path;
+		rn = *ee->r.param.path.matched_path;
 	}
 
 	/* Calculate domain to transit to. */
-	switch (tomoyo_transition_type(old_domain->domainname, &rn)) {
+	switch (tomoyo_transition_type(old_domain->ns, old_domain->domainname,
+				       &rn)) {
+	case TOMOYO_TRANSITION_CONTROL_RESET:
+		/* Transit to the root of specified namespace. */
+		snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", rn.name);
+		/*
+		 * Make do_execve() fail if domain transition across namespaces
+		 * has failed.
+		 */
+		reject_on_transition_failure = true;
+		break;
 	case TOMOYO_TRANSITION_CONTROL_INITIALIZE:
-		/* Transit to the child of tomoyo_kernel_domain domain. */
-		snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, TOMOYO_ROOT_NAME " "
-			 "%s", rn.name);
+		/* Transit to the child of current namespace's root. */
+		snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
+			 old_domain->ns->name, rn.name);
 		break;
 	case TOMOYO_TRANSITION_CONTROL_KEEP:
 		/* Keep current domain. */
@@ -502,33 +679,32 @@
 			domain = old_domain;
 		} else {
 			/* Normal domain transition. */
-			snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
+			snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
 				 old_domain->domainname->name, rn.name);
 		}
 		break;
 	}
-	if (domain || strlen(tmp) >= TOMOYO_EXEC_TMPSIZE - 10)
-		goto done;
-	domain = tomoyo_find_domain(tmp);
+	if (!domain)
+		domain = tomoyo_assign_domain(ee->tmp, true);
 	if (domain)
-		goto done;
-	if (is_enforce) {
-		int error = tomoyo_supervisor(&r, "# wants to create domain\n"
-					      "%s\n", tmp);
-		if (error == TOMOYO_RETRY_REQUEST)
-			goto retry;
-		if (error < 0)
-			goto done;
+		retval = 0;
+	else if (reject_on_transition_failure) {
+		printk(KERN_WARNING "ERROR: Domain '%s' not ready.\n",
+		       ee->tmp);
+		retval = -ENOMEM;
+	} else if (ee->r.mode == TOMOYO_CONFIG_ENFORCING)
+		retval = -ENOMEM;
+	else {
+		retval = 0;
+		if (!old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED]) {
+			old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED] = true;
+			ee->r.granted = false;
+			tomoyo_write_log(&ee->r, "%s", tomoyo_dif
+					 [TOMOYO_DIF_TRANSITION_FAILED]);
+			printk(KERN_WARNING
+			       "ERROR: Domain '%s' not defined.\n", ee->tmp);
+		}
 	}
-	domain = tomoyo_assign_domain(tmp, old_domain->profile);
- done:
-	if (domain)
-		goto out;
-	printk(KERN_WARNING "TOMOYO-ERROR: Domain '%s' not defined.\n", tmp);
-	if (is_enforce)
-		retval = -EPERM;
-	else
-		old_domain->transition_failed = true;
  out:
 	if (!domain)
 		domain = old_domain;
@@ -537,6 +713,54 @@
 	bprm->cred->security = domain;
 	if (need_kfree)
 		kfree(rn.name);
-	kfree(tmp);
+	kfree(ee->tmp);
+	kfree(ee->dump.data);
+	kfree(ee);
 	return retval;
 }
+
+/**
+ * tomoyo_dump_page - Dump a page to buffer.
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ * @pos:  Location to dump.
+ * @dump: Poiner to "struct tomoyo_page_dump".
+ *
+ * Returns true on success, false otherwise.
+ */
+bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
+		      struct tomoyo_page_dump *dump)
+{
+	struct page *page;
+	/* dump->data is released by tomoyo_finish_execve(). */
+	if (!dump->data) {
+		dump->data = kzalloc(PAGE_SIZE, GFP_NOFS);
+		if (!dump->data)
+			return false;
+	}
+	/* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
+#ifdef CONFIG_MMU
+	if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0)
+		return false;
+#else
+	page = bprm->page[pos / PAGE_SIZE];
+#endif
+	if (page != dump->page) {
+		const unsigned int offset = pos % PAGE_SIZE;
+		/*
+		 * Maybe kmap()/kunmap() should be used here.
+		 * But remove_arg_zero() uses kmap_atomic()/kunmap_atomic().
+		 * So do I.
+		 */
+		char *kaddr = kmap_atomic(page, KM_USER0);
+		dump->page = page;
+		memcpy(dump->data + offset, kaddr + offset,
+		       PAGE_SIZE - offset);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+	/* Same with put_arg_page(page) in fs/exec.c */
+#ifdef CONFIG_MMU
+	put_page(page);
+#endif
+	return true;
+}
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index d64e8ec..743c35f 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -1,80 +1,51 @@
 /*
  * security/tomoyo/file.c
  *
- * Pathname restriction functions.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
 #include <linux/slab.h>
 
-/* Keyword array for operations with one pathname. */
-const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
-	[TOMOYO_TYPE_READ_WRITE] = "read/write",
-	[TOMOYO_TYPE_EXECUTE]    = "execute",
-	[TOMOYO_TYPE_READ]       = "read",
-	[TOMOYO_TYPE_WRITE]      = "write",
-	[TOMOYO_TYPE_UNLINK]     = "unlink",
-	[TOMOYO_TYPE_RMDIR]      = "rmdir",
-	[TOMOYO_TYPE_TRUNCATE]   = "truncate",
-	[TOMOYO_TYPE_SYMLINK]    = "symlink",
-	[TOMOYO_TYPE_REWRITE]    = "rewrite",
-	[TOMOYO_TYPE_CHROOT]     = "chroot",
-	[TOMOYO_TYPE_UMOUNT]     = "unmount",
-};
-
-/* Keyword array for operations with one pathname and three numbers. */
-const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION] = {
-	[TOMOYO_TYPE_MKBLOCK]    = "mkblock",
-	[TOMOYO_TYPE_MKCHAR]     = "mkchar",
-};
-
-/* Keyword array for operations with two pathnames. */
-const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION] = {
-	[TOMOYO_TYPE_LINK]       = "link",
-	[TOMOYO_TYPE_RENAME]     = "rename",
-	[TOMOYO_TYPE_PIVOT_ROOT] = "pivot_root",
-};
-
-/* Keyword array for operations with one pathname and one number. */
-const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
-	[TOMOYO_TYPE_CREATE]     = "create",
-	[TOMOYO_TYPE_MKDIR]      = "mkdir",
-	[TOMOYO_TYPE_MKFIFO]     = "mkfifo",
-	[TOMOYO_TYPE_MKSOCK]     = "mksock",
-	[TOMOYO_TYPE_IOCTL]      = "ioctl",
-	[TOMOYO_TYPE_CHMOD]      = "chmod",
-	[TOMOYO_TYPE_CHOWN]      = "chown",
-	[TOMOYO_TYPE_CHGRP]      = "chgrp",
-};
-
+/*
+ * Mapping table from "enum tomoyo_path_acl_index" to "enum tomoyo_mac_index".
+ */
 static const u8 tomoyo_p2mac[TOMOYO_MAX_PATH_OPERATION] = {
-	[TOMOYO_TYPE_READ_WRITE] = TOMOYO_MAC_FILE_OPEN,
 	[TOMOYO_TYPE_EXECUTE]    = TOMOYO_MAC_FILE_EXECUTE,
 	[TOMOYO_TYPE_READ]       = TOMOYO_MAC_FILE_OPEN,
 	[TOMOYO_TYPE_WRITE]      = TOMOYO_MAC_FILE_OPEN,
+	[TOMOYO_TYPE_APPEND]     = TOMOYO_MAC_FILE_OPEN,
 	[TOMOYO_TYPE_UNLINK]     = TOMOYO_MAC_FILE_UNLINK,
+	[TOMOYO_TYPE_GETATTR]    = TOMOYO_MAC_FILE_GETATTR,
 	[TOMOYO_TYPE_RMDIR]      = TOMOYO_MAC_FILE_RMDIR,
 	[TOMOYO_TYPE_TRUNCATE]   = TOMOYO_MAC_FILE_TRUNCATE,
 	[TOMOYO_TYPE_SYMLINK]    = TOMOYO_MAC_FILE_SYMLINK,
-	[TOMOYO_TYPE_REWRITE]    = TOMOYO_MAC_FILE_REWRITE,
 	[TOMOYO_TYPE_CHROOT]     = TOMOYO_MAC_FILE_CHROOT,
 	[TOMOYO_TYPE_UMOUNT]     = TOMOYO_MAC_FILE_UMOUNT,
 };
 
-static const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_mkdev_acl_index" to "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
 	[TOMOYO_TYPE_MKBLOCK] = TOMOYO_MAC_FILE_MKBLOCK,
 	[TOMOYO_TYPE_MKCHAR]  = TOMOYO_MAC_FILE_MKCHAR,
 };
 
-static const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_path2_acl_index" to "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
 	[TOMOYO_TYPE_LINK]       = TOMOYO_MAC_FILE_LINK,
 	[TOMOYO_TYPE_RENAME]     = TOMOYO_MAC_FILE_RENAME,
 	[TOMOYO_TYPE_PIVOT_ROOT] = TOMOYO_MAC_FILE_PIVOT_ROOT,
 };
 
-static const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_path_number_acl_index" to
+ * "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
 	[TOMOYO_TYPE_CREATE] = TOMOYO_MAC_FILE_CREATE,
 	[TOMOYO_TYPE_MKDIR]  = TOMOYO_MAC_FILE_MKDIR,
 	[TOMOYO_TYPE_MKFIFO] = TOMOYO_MAC_FILE_MKFIFO,
@@ -85,41 +56,76 @@
 	[TOMOYO_TYPE_CHGRP]  = TOMOYO_MAC_FILE_CHGRP,
 };
 
+/**
+ * tomoyo_put_name_union - Drop reference on "struct tomoyo_name_union".
+ *
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns nothing.
+ */
 void tomoyo_put_name_union(struct tomoyo_name_union *ptr)
 {
-	if (!ptr)
-		return;
-	if (ptr->is_group)
-		tomoyo_put_group(ptr->group);
-	else
-		tomoyo_put_name(ptr->filename);
+	tomoyo_put_group(ptr->group);
+	tomoyo_put_name(ptr->filename);
 }
 
+/**
+ * tomoyo_compare_name_union - Check whether a name matches "struct tomoyo_name_union" or not.
+ *
+ * @name: Pointer to "struct tomoyo_path_info".
+ * @ptr:  Pointer to "struct tomoyo_name_union".
+ *
+ * Returns "struct tomoyo_path_info" if @name matches @ptr, NULL otherwise.
+ */
 const struct tomoyo_path_info *
 tomoyo_compare_name_union(const struct tomoyo_path_info *name,
 			  const struct tomoyo_name_union *ptr)
 {
-	if (ptr->is_group)
+	if (ptr->group)
 		return tomoyo_path_matches_group(name, ptr->group);
 	if (tomoyo_path_matches_pattern(name, ptr->filename))
 		return ptr->filename;
 	return NULL;
 }
 
+/**
+ * tomoyo_put_number_union - Drop reference on "struct tomoyo_number_union".
+ *
+ * @ptr: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
 void tomoyo_put_number_union(struct tomoyo_number_union *ptr)
 {
-	if (ptr && ptr->is_group)
-		tomoyo_put_group(ptr->group);
+	tomoyo_put_group(ptr->group);
 }
 
+/**
+ * tomoyo_compare_number_union - Check whether a value matches "struct tomoyo_number_union" or not.
+ *
+ * @value: Number to check.
+ * @ptr:   Pointer to "struct tomoyo_number_union".
+ *
+ * Returns true if @value matches @ptr, false otherwise.
+ */
 bool tomoyo_compare_number_union(const unsigned long value,
 				 const struct tomoyo_number_union *ptr)
 {
-	if (ptr->is_group)
+	if (ptr->group)
 		return tomoyo_number_matches_group(value, value, ptr->group);
 	return value >= ptr->values[0] && value <= ptr->values[1];
 }
 
+/**
+ * tomoyo_add_slash - Add trailing '/' if needed.
+ *
+ * @buf: Pointer to "struct tomoyo_path_info".
+ *
+ * Returns nothing.
+ *
+ * @buf must be generated by tomoyo_encode() because this function does not
+ * allocate memory for adding '/'.
+ */
 static void tomoyo_add_slash(struct tomoyo_path_info *buf)
 {
 	if (buf->is_dir)
@@ -132,24 +138,6 @@
 }
 
 /**
- * tomoyo_strendswith - Check whether the token ends with the given token.
- *
- * @name: The token to check.
- * @tail: The token to find.
- *
- * Returns true if @name ends with @tail, false otherwise.
- */
-static bool tomoyo_strendswith(const char *name, const char *tail)
-{
-	int len;
-
-	if (!name || !tail)
-		return false;
-	len = strlen(name) - strlen(tail);
-	return len >= 0 && !strcmp(name + len, tail);
-}
-
-/**
  * tomoyo_get_realpath - Get realpath.
  *
  * @buf:  Pointer to "struct tomoyo_path_info".
@@ -164,7 +152,7 @@
 		tomoyo_fill_path_info(buf);
 		return true;
 	}
-        return false;
+	return false;
 }
 
 /**
@@ -176,13 +164,9 @@
  */
 static int tomoyo_audit_path_log(struct tomoyo_request_info *r)
 {
-	const char *operation = tomoyo_path_keyword[r->param.path.operation];
-	const struct tomoyo_path_info *filename = r->param.path.filename;
-	if (r->granted)
-		return 0;
-	tomoyo_warn_log(r, "%s %s", operation, filename->name);
-	return tomoyo_supervisor(r, "allow_%s %s\n", operation,
-				 tomoyo_pattern(filename));
+	return tomoyo_supervisor(r, "file %s %s\n", tomoyo_path_keyword
+				 [r->param.path.operation],
+				 r->param.path.filename->name);
 }
 
 /**
@@ -194,16 +178,10 @@
  */
 static int tomoyo_audit_path2_log(struct tomoyo_request_info *r)
 {
-	const char *operation = tomoyo_path2_keyword[r->param.path2.operation];
-	const struct tomoyo_path_info *filename1 = r->param.path2.filename1;
-	const struct tomoyo_path_info *filename2 = r->param.path2.filename2;
-	if (r->granted)
-		return 0;
-	tomoyo_warn_log(r, "%s %s %s", operation, filename1->name,
-			filename2->name);
-	return tomoyo_supervisor(r, "allow_%s %s %s\n", operation,
-				 tomoyo_pattern(filename1),
-				 tomoyo_pattern(filename2));
+	return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
+				 [tomoyo_pp2mac[r->param.path2.operation]],
+				 r->param.path2.filename1->name,
+				 r->param.path2.filename2->name);
 }
 
 /**
@@ -215,24 +193,18 @@
  */
 static int tomoyo_audit_mkdev_log(struct tomoyo_request_info *r)
 {
-	const char *operation = tomoyo_mkdev_keyword[r->param.mkdev.operation];
-	const struct tomoyo_path_info *filename = r->param.mkdev.filename;
-	const unsigned int major = r->param.mkdev.major;
-	const unsigned int minor = r->param.mkdev.minor;
-	const unsigned int mode = r->param.mkdev.mode;
-	if (r->granted)
-		return 0;
-	tomoyo_warn_log(r, "%s %s 0%o %u %u", operation, filename->name, mode,
-			major, minor);
-	return tomoyo_supervisor(r, "allow_%s %s 0%o %u %u\n", operation,
-				 tomoyo_pattern(filename), mode, major, minor);
+	return tomoyo_supervisor(r, "file %s %s 0%o %u %u\n",
+				 tomoyo_mac_keywords
+				 [tomoyo_pnnn2mac[r->param.mkdev.operation]],
+				 r->param.mkdev.filename->name,
+				 r->param.mkdev.mode, r->param.mkdev.major,
+				 r->param.mkdev.minor);
 }
 
 /**
  * tomoyo_audit_path_number_log - Audit path/number request log.
  *
- * @r:     Pointer to "struct tomoyo_request_info".
- * @error: Error code.
+ * @r: Pointer to "struct tomoyo_request_info".
  *
  * Returns 0 on success, negative value otherwise.
  */
@@ -240,11 +212,7 @@
 {
 	const u8 type = r->param.path_number.operation;
 	u8 radix;
-	const struct tomoyo_path_info *filename = r->param.path_number.filename;
-	const char *operation = tomoyo_path_number_keyword[type];
 	char buffer[64];
-	if (r->granted)
-		return 0;
 	switch (type) {
 	case TOMOYO_TYPE_CREATE:
 	case TOMOYO_TYPE_MKDIR:
@@ -262,251 +230,23 @@
 	}
 	tomoyo_print_ulong(buffer, sizeof(buffer), r->param.path_number.number,
 			   radix);
-	tomoyo_warn_log(r, "%s %s %s", operation, filename->name, buffer);
-	return tomoyo_supervisor(r, "allow_%s %s %s\n", operation,
-				 tomoyo_pattern(filename), buffer);
-}
-
-static bool tomoyo_same_globally_readable(const struct tomoyo_acl_head *a,
-					  const struct tomoyo_acl_head *b)
-{
-	return container_of(a, struct tomoyo_readable_file,
-			    head)->filename ==
-		container_of(b, struct tomoyo_readable_file,
-			     head)->filename;
+	return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
+				 [tomoyo_pn2mac[type]],
+				 r->param.path_number.filename->name, buffer);
 }
 
 /**
- * tomoyo_update_globally_readable_entry - Update "struct tomoyo_readable_file" list.
+ * tomoyo_check_path_acl - Check permission for path operation.
  *
- * @filename:  Filename unconditionally permitted to open() for reading.
- * @is_delete: True if it is a delete request.
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
  *
- * Returns 0 on success, negative value otherwise.
+ * Returns true if granted, false otherwise.
  *
- * Caller holds tomoyo_read_lock().
+ * To be able to use wildcard for domain transition, this function sets
+ * matching entry on success. Since the caller holds tomoyo_read_lock(),
+ * it is safe to set matching entry.
  */
-static int tomoyo_update_globally_readable_entry(const char *filename,
-						 const bool is_delete)
-{
-	struct tomoyo_readable_file e = { };
-	int error;
-
-	if (!tomoyo_correct_word(filename))
-		return -EINVAL;
-	e.filename = tomoyo_get_name(filename);
-	if (!e.filename)
-		return -ENOMEM;
-	error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-				     &tomoyo_policy_list
-				     [TOMOYO_ID_GLOBALLY_READABLE],
-				     tomoyo_same_globally_readable);
-	tomoyo_put_name(e.filename);
-	return error;
-}
-
-/**
- * tomoyo_globally_readable_file - Check if the file is unconditionnaly permitted to be open()ed for reading.
- *
- * @filename: The filename to check.
- *
- * Returns true if any domain can open @filename for reading, false otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static bool tomoyo_globally_readable_file(const struct tomoyo_path_info *
-					     filename)
-{
-	struct tomoyo_readable_file *ptr;
-	bool found = false;
-
-	list_for_each_entry_rcu(ptr, &tomoyo_policy_list
-				[TOMOYO_ID_GLOBALLY_READABLE], head.list) {
-		if (!ptr->head.is_deleted &&
-		    tomoyo_path_matches_pattern(filename, ptr->filename)) {
-			found = true;
-			break;
-		}
-	}
-	return found;
-}
-
-/**
- * tomoyo_write_globally_readable - Write "struct tomoyo_readable_file" list.
- *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_globally_readable(char *data, const bool is_delete)
-{
-	return tomoyo_update_globally_readable_entry(data, is_delete);
-}
-
-static bool tomoyo_same_pattern(const struct tomoyo_acl_head *a,
-				const struct tomoyo_acl_head *b)
-{
-	return container_of(a, struct tomoyo_no_pattern, head)->pattern ==
-		container_of(b, struct tomoyo_no_pattern, head)->pattern;
-}
-
-/**
- * tomoyo_update_file_pattern_entry - Update "struct tomoyo_no_pattern" list.
- *
- * @pattern:   Pathname pattern.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_file_pattern_entry(const char *pattern,
-					    const bool is_delete)
-{
-	struct tomoyo_no_pattern e = { };
-	int error;
-
-	if (!tomoyo_correct_word(pattern))
-		return -EINVAL;
-	e.pattern = tomoyo_get_name(pattern);
-	if (!e.pattern)
-		return -ENOMEM;
-	error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-				     &tomoyo_policy_list[TOMOYO_ID_PATTERN],
-				     tomoyo_same_pattern);
-	tomoyo_put_name(e.pattern);
-	return error;
-}
-
-/**
- * tomoyo_pattern - Get patterned pathname.
- *
- * @filename: The filename to find patterned pathname.
- *
- * Returns pointer to pathname pattern if matched, @filename otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-const char *tomoyo_pattern(const struct tomoyo_path_info *filename)
-{
-	struct tomoyo_no_pattern *ptr;
-	const struct tomoyo_path_info *pattern = NULL;
-
-	list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_PATTERN],
-				head.list) {
-		if (ptr->head.is_deleted)
-			continue;
-		if (!tomoyo_path_matches_pattern(filename, ptr->pattern))
-			continue;
-		pattern = ptr->pattern;
-		if (tomoyo_strendswith(pattern->name, "/\\*")) {
-			/* Do nothing. Try to find the better match. */
-		} else {
-			/* This would be the better match. Use this. */
-			break;
-		}
-	}
-	if (pattern)
-		filename = pattern;
-	return filename->name;
-}
-
-/**
- * tomoyo_write_pattern - Write "struct tomoyo_no_pattern" list.
- *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_pattern(char *data, const bool is_delete)
-{
-	return tomoyo_update_file_pattern_entry(data, is_delete);
-}
-
-static bool tomoyo_same_no_rewrite(const struct tomoyo_acl_head *a,
-				   const struct tomoyo_acl_head *b)
-{
-	return container_of(a, struct tomoyo_no_rewrite, head)->pattern
-		== container_of(b, struct tomoyo_no_rewrite, head)
-		->pattern;
-}
-
-/**
- * tomoyo_update_no_rewrite_entry - Update "struct tomoyo_no_rewrite" list.
- *
- * @pattern:   Pathname pattern that are not rewritable by default.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_no_rewrite_entry(const char *pattern,
-					  const bool is_delete)
-{
-	struct tomoyo_no_rewrite e = { };
-	int error;
-
-	if (!tomoyo_correct_word(pattern))
-		return -EINVAL;
-	e.pattern = tomoyo_get_name(pattern);
-	if (!e.pattern)
-		return -ENOMEM;
-	error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-				     &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE],
-				     tomoyo_same_no_rewrite);
-	tomoyo_put_name(e.pattern);
-	return error;
-}
-
-/**
- * tomoyo_no_rewrite_file - Check if the given pathname is not permitted to be rewrited.
- *
- * @filename: Filename to check.
- *
- * Returns true if @filename is specified by "deny_rewrite" directive,
- * false otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static bool tomoyo_no_rewrite_file(const struct tomoyo_path_info *filename)
-{
-	struct tomoyo_no_rewrite *ptr;
-	bool found = false;
-
-	list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE],
-				head.list) {
-		if (ptr->head.is_deleted)
-			continue;
-		if (!tomoyo_path_matches_pattern(filename, ptr->pattern))
-			continue;
-		found = true;
-		break;
-	}
-	return found;
-}
-
-/**
- * tomoyo_write_no_rewrite - Write "struct tomoyo_no_rewrite" list.
- *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_no_rewrite(char *data, const bool is_delete)
-{
-	return tomoyo_update_no_rewrite_entry(data, is_delete);
-}
-
 static bool tomoyo_check_path_acl(struct tomoyo_request_info *r,
 				  const struct tomoyo_acl_info *ptr)
 {
@@ -521,6 +261,14 @@
 	return false;
 }
 
+/**
+ * tomoyo_check_path_number_acl - Check permission for path number operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r,
 					 const struct tomoyo_acl_info *ptr)
 {
@@ -533,6 +281,14 @@
 					  &acl->name);
 }
 
+/**
+ * tomoyo_check_path2_acl - Check permission for path path operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r,
 				   const struct tomoyo_acl_info *ptr)
 {
@@ -544,8 +300,16 @@
 					     &acl->name2);
 }
 
+/**
+ * tomoyo_check_mkdev_acl - Check permission for path number number number operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r,
-				const struct tomoyo_acl_info *ptr)
+				   const struct tomoyo_acl_info *ptr)
 {
 	const struct tomoyo_mkdev_acl *acl =
 		container_of(ptr, typeof(*acl), head);
@@ -560,15 +324,31 @@
 					  &acl->name);
 }
 
+/**
+ * tomoyo_same_path_acl - Check for duplicated "struct tomoyo_path_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_path_acl(const struct tomoyo_acl_info *a,
 				 const struct tomoyo_acl_info *b)
 {
 	const struct tomoyo_path_acl *p1 = container_of(a, typeof(*p1), head);
 	const struct tomoyo_path_acl *p2 = container_of(b, typeof(*p2), head);
-	return tomoyo_same_acl_head(&p1->head, &p2->head) &&
-		tomoyo_same_name_union(&p1->name, &p2->name);
+	return tomoyo_same_name_union(&p1->name, &p2->name);
 }
 
+/**
+ * tomoyo_merge_path_acl - Merge duplicated "struct tomoyo_path_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a,
 				  struct tomoyo_acl_info *b,
 				  const bool is_delete)
@@ -577,19 +357,10 @@
 		->perm;
 	u16 perm = *a_perm;
 	const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm;
-	if (is_delete) {
+	if (is_delete)
 		perm &= ~b_perm;
-		if ((perm & TOMOYO_RW_MASK) != TOMOYO_RW_MASK)
-			perm &= ~(1 << TOMOYO_TYPE_READ_WRITE);
-		else if (!(perm & (1 << TOMOYO_TYPE_READ_WRITE)))
-			perm &= ~TOMOYO_RW_MASK;
-	} else {
+	else
 		perm |= b_perm;
-		if ((perm & TOMOYO_RW_MASK) == TOMOYO_RW_MASK)
-			perm |= (1 << TOMOYO_TYPE_READ_WRITE);
-		else if (perm & (1 << TOMOYO_TYPE_READ_WRITE))
-			perm |= TOMOYO_RW_MASK;
-	}
 	*a_perm = perm;
 	return !perm;
 }
@@ -597,52 +368,62 @@
 /**
  * tomoyo_update_path_acl - Update "struct tomoyo_path_acl" list.
  *
- * @type:      Type of operation.
- * @filename:  Filename.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_path_acl(const u8 type, const char *filename,
-				  struct tomoyo_domain_info * const domain,
-				  const bool is_delete)
+static int tomoyo_update_path_acl(const u16 perm,
+				  struct tomoyo_acl_param *param)
 {
 	struct tomoyo_path_acl e = {
 		.head.type = TOMOYO_TYPE_PATH_ACL,
-		.perm = 1 << type
+		.perm = perm
 	};
 	int error;
-	if (e.perm == (1 << TOMOYO_TYPE_READ_WRITE))
-		e.perm |= TOMOYO_RW_MASK;
-	if (!tomoyo_parse_name_union(filename, &e.name))
-		return -EINVAL;
-	error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-				     tomoyo_same_path_acl,
-				     tomoyo_merge_path_acl);
+	if (!tomoyo_parse_name_union(param, &e.name))
+		error = -EINVAL;
+	else
+		error = tomoyo_update_domain(&e.head, sizeof(e), param,
+					     tomoyo_same_path_acl,
+					     tomoyo_merge_path_acl);
 	tomoyo_put_name_union(&e.name);
 	return error;
 }
 
+/**
+ * tomoyo_same_mkdev_acl - Check for duplicated "struct tomoyo_mkdev_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_mkdev_acl(const struct tomoyo_acl_info *a,
 					 const struct tomoyo_acl_info *b)
 {
-	const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1),
-								head);
-	const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2),
-								head);
-	return tomoyo_same_acl_head(&p1->head, &p2->head)
-		&& tomoyo_same_name_union(&p1->name, &p2->name)
-		&& tomoyo_same_number_union(&p1->mode, &p2->mode)
-		&& tomoyo_same_number_union(&p1->major, &p2->major)
-		&& tomoyo_same_number_union(&p1->minor, &p2->minor);
+	const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), head);
+	const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), head);
+	return tomoyo_same_name_union(&p1->name, &p2->name) &&
+		tomoyo_same_number_union(&p1->mode, &p2->mode) &&
+		tomoyo_same_number_union(&p1->major, &p2->major) &&
+		tomoyo_same_number_union(&p1->minor, &p2->minor);
 }
 
+/**
+ * tomoyo_merge_mkdev_acl - Merge duplicated "struct tomoyo_mkdev_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a,
-					  struct tomoyo_acl_info *b,
-					  const bool is_delete)
+				   struct tomoyo_acl_info *b,
+				   const bool is_delete)
 {
 	u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl,
 					 head)->perm;
@@ -660,37 +441,30 @@
 /**
  * tomoyo_update_mkdev_acl - Update "struct tomoyo_mkdev_acl" list.
  *
- * @type:      Type of operation.
- * @filename:  Filename.
- * @mode:      Create mode.
- * @major:     Device major number.
- * @minor:     Device minor number.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_mkdev_acl(const u8 type, const char *filename,
-					  char *mode, char *major, char *minor,
-					  struct tomoyo_domain_info * const
-					  domain, const bool is_delete)
+static int tomoyo_update_mkdev_acl(const u8 perm,
+				   struct tomoyo_acl_param *param)
 {
 	struct tomoyo_mkdev_acl e = {
 		.head.type = TOMOYO_TYPE_MKDEV_ACL,
-		.perm = 1 << type
+		.perm = perm
 	};
-	int error = is_delete ? -ENOENT : -ENOMEM;
-	if (!tomoyo_parse_name_union(filename, &e.name) ||
-	    !tomoyo_parse_number_union(mode, &e.mode) ||
-	    !tomoyo_parse_number_union(major, &e.major) ||
-	    !tomoyo_parse_number_union(minor, &e.minor))
-		goto out;
-	error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-				     tomoyo_same_mkdev_acl,
-				     tomoyo_merge_mkdev_acl);
- out:
+	int error;
+	if (!tomoyo_parse_name_union(param, &e.name) ||
+	    !tomoyo_parse_number_union(param, &e.mode) ||
+	    !tomoyo_parse_number_union(param, &e.major) ||
+	    !tomoyo_parse_number_union(param, &e.minor))
+		error = -EINVAL;
+	else
+		error = tomoyo_update_domain(&e.head, sizeof(e), param,
+					     tomoyo_same_mkdev_acl,
+					     tomoyo_merge_mkdev_acl);
 	tomoyo_put_name_union(&e.name);
 	tomoyo_put_number_union(&e.mode);
 	tomoyo_put_number_union(&e.major);
@@ -698,16 +472,32 @@
 	return error;
 }
 
+/**
+ * tomoyo_same_path2_acl - Check for duplicated "struct tomoyo_path2_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_path2_acl(const struct tomoyo_acl_info *a,
 				  const struct tomoyo_acl_info *b)
 {
 	const struct tomoyo_path2_acl *p1 = container_of(a, typeof(*p1), head);
 	const struct tomoyo_path2_acl *p2 = container_of(b, typeof(*p2), head);
-	return tomoyo_same_acl_head(&p1->head, &p2->head)
-		&& tomoyo_same_name_union(&p1->name1, &p2->name1)
-		&& tomoyo_same_name_union(&p1->name2, &p2->name2);
+	return tomoyo_same_name_union(&p1->name1, &p2->name1) &&
+		tomoyo_same_name_union(&p1->name2, &p2->name2);
 }
 
+/**
+ * tomoyo_merge_path2_acl - Merge duplicated "struct tomoyo_path2_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a,
 				   struct tomoyo_acl_info *b,
 				   const bool is_delete)
@@ -727,33 +517,28 @@
 /**
  * tomoyo_update_path2_acl - Update "struct tomoyo_path2_acl" list.
  *
- * @type:      Type of operation.
- * @filename1: First filename.
- * @filename2: Second filename.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_path2_acl(const u8 type, const char *filename1,
-				   const char *filename2,
-				   struct tomoyo_domain_info * const domain,
-				   const bool is_delete)
+static int tomoyo_update_path2_acl(const u8 perm,
+				   struct tomoyo_acl_param *param)
 {
 	struct tomoyo_path2_acl e = {
 		.head.type = TOMOYO_TYPE_PATH2_ACL,
-		.perm = 1 << type
+		.perm = perm
 	};
-	int error = is_delete ? -ENOENT : -ENOMEM;
-	if (!tomoyo_parse_name_union(filename1, &e.name1) ||
-	    !tomoyo_parse_name_union(filename2, &e.name2))
-		goto out;
-	error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-				     tomoyo_same_path2_acl,
-				     tomoyo_merge_path2_acl);
- out:
+	int error;
+	if (!tomoyo_parse_name_union(param, &e.name1) ||
+	    !tomoyo_parse_name_union(param, &e.name2))
+		error = -EINVAL;
+	else
+		error = tomoyo_update_domain(&e.head, sizeof(e), param,
+					     tomoyo_same_path2_acl,
+					     tomoyo_merge_path2_acl);
 	tomoyo_put_name_union(&e.name1);
 	tomoyo_put_name_union(&e.name2);
 	return error;
@@ -775,9 +560,8 @@
 {
 	int error;
 
- next:
 	r->type = tomoyo_p2mac[operation];
-	r->mode = tomoyo_get_mode(r->profile, r->type);
+	r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type);
 	if (r->mode == TOMOYO_CONFIG_DISABLED)
 		return 0;
 	r->param_type = TOMOYO_TYPE_PATH_ACL;
@@ -785,10 +569,6 @@
 	r->param.path.operation = operation;
 	do {
 		tomoyo_check_acl(r, tomoyo_check_path_acl);
-		if (!r->granted && operation == TOMOYO_TYPE_READ &&
-		    !r->domain->ignore_global_allow_read &&
-		    tomoyo_globally_readable_file(filename))
-			r->granted = true;
 		error = tomoyo_audit_path_log(r);
 		/*
 		 * Do not retry for execute request, for alias may have
@@ -796,19 +576,17 @@
 		 */
 	} while (error == TOMOYO_RETRY_REQUEST &&
 		 operation != TOMOYO_TYPE_EXECUTE);
-	/*
-	 * Since "allow_truncate" doesn't imply "allow_rewrite" permission,
-	 * we need to check "allow_rewrite" permission if the filename is
-	 * specified by "deny_rewrite" keyword.
-	 */
-	if (!error && operation == TOMOYO_TYPE_TRUNCATE &&
-	    tomoyo_no_rewrite_file(filename)) {
-		operation = TOMOYO_TYPE_REWRITE;
-		goto next;
-	}
 	return error;
 }
 
+/**
+ * tomoyo_same_path_number_acl - Check for duplicated "struct tomoyo_path_number_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a,
 					const struct tomoyo_acl_info *b)
 {
@@ -816,11 +594,19 @@
 							       head);
 	const struct tomoyo_path_number_acl *p2 = container_of(b, typeof(*p2),
 							       head);
-	return tomoyo_same_acl_head(&p1->head, &p2->head)
-		&& tomoyo_same_name_union(&p1->name, &p2->name)
-		&& tomoyo_same_number_union(&p1->number, &p2->number);
+	return tomoyo_same_name_union(&p1->name, &p2->name) &&
+		tomoyo_same_number_union(&p1->number, &p2->number);
 }
 
+/**
+ * tomoyo_merge_path_number_acl - Merge duplicated "struct tomoyo_path_number_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a,
 					 struct tomoyo_acl_info *b,
 					 const bool is_delete)
@@ -841,33 +627,26 @@
 /**
  * tomoyo_update_path_number_acl - Update ioctl/chmod/chown/chgrp ACL.
  *
- * @type:      Type of operation.
- * @filename:  Filename.
- * @number:    Number.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_update_path_number_acl(const u8 type, const char *filename,
-					 char *number,
-					 struct tomoyo_domain_info * const
-					 domain,
-					 const bool is_delete)
+static int tomoyo_update_path_number_acl(const u8 perm,
+					 struct tomoyo_acl_param *param)
 {
 	struct tomoyo_path_number_acl e = {
 		.head.type = TOMOYO_TYPE_PATH_NUMBER_ACL,
-		.perm = 1 << type
+		.perm = perm
 	};
-	int error = is_delete ? -ENOENT : -ENOMEM;
-	if (!tomoyo_parse_name_union(filename, &e.name))
-		return -EINVAL;
-	if (!tomoyo_parse_number_union(number, &e.number))
-		goto out;
-	error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-				     tomoyo_same_path_number_acl,
-				     tomoyo_merge_path_number_acl);
- out:
+	int error;
+	if (!tomoyo_parse_name_union(param, &e.name) ||
+	    !tomoyo_parse_number_union(param, &e.number))
+		error = -EINVAL;
+	else
+		error = tomoyo_update_domain(&e.head, sizeof(e), param,
+					     tomoyo_same_path_number_acl,
+					     tomoyo_merge_path_number_acl);
 	tomoyo_put_name_union(&e.name);
 	tomoyo_put_number_union(&e.number);
 	return error;
@@ -886,16 +665,20 @@
 			    unsigned long number)
 {
 	struct tomoyo_request_info r;
+	struct tomoyo_obj_info obj = {
+		.path1 = *path,
+	};
 	int error = -ENOMEM;
 	struct tomoyo_path_info buf;
 	int idx;
 
 	if (tomoyo_init_request_info(&r, NULL, tomoyo_pn2mac[type])
-	    == TOMOYO_CONFIG_DISABLED || !path->mnt || !path->dentry)
+	    == TOMOYO_CONFIG_DISABLED || !path->dentry)
 		return 0;
 	idx = tomoyo_read_lock();
 	if (!tomoyo_get_realpath(&buf, path))
 		goto out;
+	r.obj = &obj;
 	if (type == TOMOYO_TYPE_MKDIR)
 		tomoyo_add_slash(&buf);
 	r.param_type = TOMOYO_TYPE_PATH_NUMBER_ACL;
@@ -930,45 +713,30 @@
 	int error = 0;
 	struct tomoyo_path_info buf;
 	struct tomoyo_request_info r;
+	struct tomoyo_obj_info obj = {
+		.path1 = *path,
+	};
 	int idx;
 
-	if (!path->mnt ||
-	    (path->dentry->d_inode && S_ISDIR(path->dentry->d_inode->i_mode)))
-		return 0;
 	buf.name = NULL;
 	r.mode = TOMOYO_CONFIG_DISABLED;
 	idx = tomoyo_read_lock();
-	/*
-	 * If the filename is specified by "deny_rewrite" keyword,
-	 * we need to check "allow_rewrite" permission when the filename is not
-	 * opened for append mode or the filename is truncated at open time.
-	 */
-	if ((acc_mode & MAY_WRITE) && !(flag & O_APPEND)
-	    && tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_REWRITE)
+	if (acc_mode &&
+	    tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
 	    != TOMOYO_CONFIG_DISABLED) {
 		if (!tomoyo_get_realpath(&buf, path)) {
 			error = -ENOMEM;
 			goto out;
 		}
-		if (tomoyo_no_rewrite_file(&buf))
-			error = tomoyo_path_permission(&r, TOMOYO_TYPE_REWRITE,
+		r.obj = &obj;
+		if (acc_mode & MAY_READ)
+			error = tomoyo_path_permission(&r, TOMOYO_TYPE_READ,
 						       &buf);
-	}
-	if (!error && acc_mode &&
-	    tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
-	    != TOMOYO_CONFIG_DISABLED) {
-		u8 operation;
-		if (!buf.name && !tomoyo_get_realpath(&buf, path)) {
-			error = -ENOMEM;
-			goto out;
-		}
-		if (acc_mode == (MAY_READ | MAY_WRITE))
-			operation = TOMOYO_TYPE_READ_WRITE;
-		else if (acc_mode == MAY_READ)
-			operation = TOMOYO_TYPE_READ;
-		else
-			operation = TOMOYO_TYPE_WRITE;
-		error = tomoyo_path_permission(&r, operation, &buf);
+		if (!error && (acc_mode & MAY_WRITE))
+			error = tomoyo_path_permission(&r, (flag & O_APPEND) ?
+						       TOMOYO_TYPE_APPEND :
+						       TOMOYO_TYPE_WRITE,
+						       &buf);
 	}
  out:
 	kfree(buf.name);
@@ -979,46 +747,57 @@
 }
 
 /**
- * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "rewrite", "chroot" and "unmount".
+ * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "append", "chroot" and "unmount".
  *
  * @operation: Type of operation.
  * @path:      Pointer to "struct path".
+ * @target:    Symlink's target if @operation is TOMOYO_TYPE_SYMLINK,
+ *             NULL otherwise.
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_path_perm(const u8 operation, struct path *path)
+int tomoyo_path_perm(const u8 operation, struct path *path, const char *target)
 {
-	int error = -ENOMEM;
-	struct tomoyo_path_info buf;
 	struct tomoyo_request_info r;
+	struct tomoyo_obj_info obj = {
+		.path1 = *path,
+	};
+	int error;
+	struct tomoyo_path_info buf;
+	bool is_enforce;
+	struct tomoyo_path_info symlink_target;
 	int idx;
 
-	if (!path->mnt)
-		return 0;
 	if (tomoyo_init_request_info(&r, NULL, tomoyo_p2mac[operation])
 	    == TOMOYO_CONFIG_DISABLED)
 		return 0;
+	is_enforce = (r.mode == TOMOYO_CONFIG_ENFORCING);
+	error = -ENOMEM;
 	buf.name = NULL;
 	idx = tomoyo_read_lock();
 	if (!tomoyo_get_realpath(&buf, path))
 		goto out;
+	r.obj = &obj;
 	switch (operation) {
-	case TOMOYO_TYPE_REWRITE:
-		if (!tomoyo_no_rewrite_file(&buf)) {
-			error = 0;
-			goto out;
-		}
-		break;
 	case TOMOYO_TYPE_RMDIR:
 	case TOMOYO_TYPE_CHROOT:
 		tomoyo_add_slash(&buf);
 		break;
+	case TOMOYO_TYPE_SYMLINK:
+		symlink_target.name = tomoyo_encode(target);
+		if (!symlink_target.name)
+			goto out;
+		tomoyo_fill_path_info(&symlink_target);
+		obj.symlink_target = &symlink_target;
+		break;
 	}
 	error = tomoyo_path_permission(&r, operation, &buf);
+	if (operation == TOMOYO_TYPE_SYMLINK)
+		kfree(symlink_target.name);
  out:
 	kfree(buf.name);
 	tomoyo_read_unlock(idx);
-	if (r.mode != TOMOYO_CONFIG_ENFORCING)
+	if (!is_enforce)
 		error = 0;
 	return error;
 }
@@ -1034,20 +813,23 @@
  * Returns 0 on success, negative value otherwise.
  */
 int tomoyo_mkdev_perm(const u8 operation, struct path *path,
-			     const unsigned int mode, unsigned int dev)
+		      const unsigned int mode, unsigned int dev)
 {
 	struct tomoyo_request_info r;
+	struct tomoyo_obj_info obj = {
+		.path1 = *path,
+	};
 	int error = -ENOMEM;
 	struct tomoyo_path_info buf;
 	int idx;
 
-	if (!path->mnt ||
-	    tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
+	if (tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
 	    == TOMOYO_CONFIG_DISABLED)
 		return 0;
 	idx = tomoyo_read_lock();
 	error = -ENOMEM;
 	if (tomoyo_get_realpath(&buf, path)) {
+		r.obj = &obj;
 		dev = new_decode_dev(dev);
 		r.param_type = TOMOYO_TYPE_MKDEV_ACL;
 		r.param.mkdev.filename = &buf;
@@ -1081,10 +863,13 @@
 	struct tomoyo_path_info buf1;
 	struct tomoyo_path_info buf2;
 	struct tomoyo_request_info r;
+	struct tomoyo_obj_info obj = {
+		.path1 = *path1,
+		.path2 = *path2,
+	};
 	int idx;
 
-	if (!path1->mnt || !path2->mnt ||
-	    tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
+	if (tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
 	    == TOMOYO_CONFIG_DISABLED)
 		return 0;
 	buf1.name = NULL;
@@ -1096,16 +881,17 @@
 	switch (operation) {
 		struct dentry *dentry;
 	case TOMOYO_TYPE_RENAME:
-        case TOMOYO_TYPE_LINK:
+	case TOMOYO_TYPE_LINK:
 		dentry = path1->dentry;
-	        if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
-                        break;
-                /* fall through */
-        case TOMOYO_TYPE_PIVOT_ROOT:
-                tomoyo_add_slash(&buf1);
-                tomoyo_add_slash(&buf2);
+		if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
+			break;
+		/* fall through */
+	case TOMOYO_TYPE_PIVOT_ROOT:
+		tomoyo_add_slash(&buf1);
+		tomoyo_add_slash(&buf2);
 		break;
-        }
+	}
+	r.obj = &obj;
 	r.param_type = TOMOYO_TYPE_PATH2_ACL;
 	r.param.path2.operation = operation;
 	r.param.path2.filename1 = &buf1;
@@ -1124,53 +910,91 @@
 }
 
 /**
- * tomoyo_write_file - Update file related list.
+ * tomoyo_same_mount_acl - Check for duplicated "struct tomoyo_mount_acl" entry.
  *
- * @data:      String to parse.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
+				  const struct tomoyo_acl_info *b)
+{
+	const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
+	const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);
+	return tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
+		tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
+		tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
+		tomoyo_same_number_union(&p1->flags, &p2->flags);
+}
+
+/**
+ * tomoyo_update_mount_acl - Write "struct tomoyo_mount_acl" list.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain,
-		      const bool is_delete)
+static int tomoyo_update_mount_acl(struct tomoyo_acl_param *param)
 {
-	char *w[5];
+	struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
+	int error;
+	if (!tomoyo_parse_name_union(param, &e.dev_name) ||
+	    !tomoyo_parse_name_union(param, &e.dir_name) ||
+	    !tomoyo_parse_name_union(param, &e.fs_type) ||
+	    !tomoyo_parse_number_union(param, &e.flags))
+		error = -EINVAL;
+	else
+		error = tomoyo_update_domain(&e.head, sizeof(e), param,
+					     tomoyo_same_mount_acl, NULL);
+	tomoyo_put_name_union(&e.dev_name);
+	tomoyo_put_name_union(&e.dir_name);
+	tomoyo_put_name_union(&e.fs_type);
+	tomoyo_put_number_union(&e.flags);
+	return error;
+}
+
+/**
+ * tomoyo_write_file - Update file related list.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+int tomoyo_write_file(struct tomoyo_acl_param *param)
+{
+	u16 perm = 0;
 	u8 type;
-	if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0])
-		return -EINVAL;
-	if (strncmp(w[0], "allow_", 6))
-		goto out;
-	w[0] += 6;
-	for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++) {
-		if (strcmp(w[0], tomoyo_path_keyword[type]))
-			continue;
-		return tomoyo_update_path_acl(type, w[1], domain, is_delete);
-	}
-	if (!w[2][0])
-		goto out;
-	for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++) {
-		if (strcmp(w[0], tomoyo_path2_keyword[type]))
-			continue;
-		return tomoyo_update_path2_acl(type, w[1], w[2], domain,
-					       is_delete);
-	}
-	for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++) {
-		if (strcmp(w[0], tomoyo_path_number_keyword[type]))
-			continue;
-		return tomoyo_update_path_number_acl(type, w[1], w[2], domain,
-						     is_delete);
-	}
-	if (!w[3][0] || !w[4][0])
-		goto out;
-	for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++) {
-		if (strcmp(w[0], tomoyo_mkdev_keyword[type]))
-			continue;
-		return tomoyo_update_mkdev_acl(type, w[1], w[2], w[3],
-					       w[4], domain, is_delete);
-	}
- out:
+	const char *operation = tomoyo_read_token(param);
+	for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++)
+		if (tomoyo_permstr(operation, tomoyo_path_keyword[type]))
+			perm |= 1 << type;
+	if (perm)
+		return tomoyo_update_path_acl(perm, param);
+	for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++)
+		if (tomoyo_permstr(operation,
+				   tomoyo_mac_keywords[tomoyo_pp2mac[type]]))
+			perm |= 1 << type;
+	if (perm)
+		return tomoyo_update_path2_acl(perm, param);
+	for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++)
+		if (tomoyo_permstr(operation,
+				   tomoyo_mac_keywords[tomoyo_pn2mac[type]]))
+			perm |= 1 << type;
+	if (perm)
+		return tomoyo_update_path_number_acl(perm, param);
+	for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++)
+		if (tomoyo_permstr(operation,
+				   tomoyo_mac_keywords[tomoyo_pnnn2mac[type]]))
+			perm |= 1 << type;
+	if (perm)
+		return tomoyo_update_mkdev_acl(perm, param);
+	if (tomoyo_permstr(operation,
+			   tomoyo_mac_keywords[TOMOYO_MAC_FILE_MOUNT]))
+		return tomoyo_update_mount_acl(param);
 	return -EINVAL;
 }
diff --git a/security/tomoyo/gc.c b/security/tomoyo/gc.c
index a877e4c..ae135fb 100644
--- a/security/tomoyo/gc.c
+++ b/security/tomoyo/gc.c
@@ -1,58 +1,205 @@
 /*
  * security/tomoyo/gc.c
  *
- * Implementation of the Domain-Based Mandatory Access Control.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
- *
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
 #include <linux/kthread.h>
 #include <linux/slab.h>
 
+/* The list for "struct tomoyo_io_buffer". */
+static LIST_HEAD(tomoyo_io_buffer_list);
+/* Lock for protecting tomoyo_io_buffer_list. */
+static DEFINE_SPINLOCK(tomoyo_io_buffer_list_lock);
+
+/* Size of an element. */
+static const u8 tomoyo_element_size[TOMOYO_MAX_POLICY] = {
+	[TOMOYO_ID_GROUP] = sizeof(struct tomoyo_group),
+	[TOMOYO_ID_PATH_GROUP] = sizeof(struct tomoyo_path_group),
+	[TOMOYO_ID_NUMBER_GROUP] = sizeof(struct tomoyo_number_group),
+	[TOMOYO_ID_AGGREGATOR] = sizeof(struct tomoyo_aggregator),
+	[TOMOYO_ID_TRANSITION_CONTROL] =
+	sizeof(struct tomoyo_transition_control),
+	[TOMOYO_ID_MANAGER] = sizeof(struct tomoyo_manager),
+	/* [TOMOYO_ID_CONDITION] = "struct tomoyo_condition"->size, */
+	/* [TOMOYO_ID_NAME] = "struct tomoyo_name"->size, */
+	/* [TOMOYO_ID_ACL] =
+	   tomoyo_acl_size["struct tomoyo_acl_info"->type], */
+	[TOMOYO_ID_DOMAIN] = sizeof(struct tomoyo_domain_info),
+};
+
+/* Size of a domain ACL element. */
+static const u8 tomoyo_acl_size[] = {
+	[TOMOYO_TYPE_PATH_ACL] = sizeof(struct tomoyo_path_acl),
+	[TOMOYO_TYPE_PATH2_ACL] = sizeof(struct tomoyo_path2_acl),
+	[TOMOYO_TYPE_PATH_NUMBER_ACL] = sizeof(struct tomoyo_path_number_acl),
+	[TOMOYO_TYPE_MKDEV_ACL] = sizeof(struct tomoyo_mkdev_acl),
+	[TOMOYO_TYPE_MOUNT_ACL] = sizeof(struct tomoyo_mount_acl),
+};
+
+/**
+ * tomoyo_struct_used_by_io_buffer - Check whether the list element is used by /sys/kernel/security/tomoyo/ users or not.
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true if @element is used by /sys/kernel/security/tomoyo/ users,
+ * false otherwise.
+ */
+static bool tomoyo_struct_used_by_io_buffer(const struct list_head *element)
+{
+	struct tomoyo_io_buffer *head;
+	bool in_use = false;
+
+	spin_lock(&tomoyo_io_buffer_list_lock);
+	list_for_each_entry(head, &tomoyo_io_buffer_list, list) {
+		head->users++;
+		spin_unlock(&tomoyo_io_buffer_list_lock);
+		if (mutex_lock_interruptible(&head->io_sem)) {
+			in_use = true;
+			goto out;
+		}
+		if (head->r.domain == element || head->r.group == element ||
+		    head->r.acl == element || &head->w.domain->list == element)
+			in_use = true;
+		mutex_unlock(&head->io_sem);
+out:
+		spin_lock(&tomoyo_io_buffer_list_lock);
+		head->users--;
+		if (in_use)
+			break;
+	}
+	spin_unlock(&tomoyo_io_buffer_list_lock);
+	return in_use;
+}
+
+/**
+ * tomoyo_name_used_by_io_buffer - Check whether the string is used by /sys/kernel/security/tomoyo/ users or not.
+ *
+ * @string: String to check.
+ * @size:   Memory allocated for @string .
+ *
+ * Returns true if @string is used by /sys/kernel/security/tomoyo/ users,
+ * false otherwise.
+ */
+static bool tomoyo_name_used_by_io_buffer(const char *string,
+					  const size_t size)
+{
+	struct tomoyo_io_buffer *head;
+	bool in_use = false;
+
+	spin_lock(&tomoyo_io_buffer_list_lock);
+	list_for_each_entry(head, &tomoyo_io_buffer_list, list) {
+		int i;
+		head->users++;
+		spin_unlock(&tomoyo_io_buffer_list_lock);
+		if (mutex_lock_interruptible(&head->io_sem)) {
+			in_use = true;
+			goto out;
+		}
+		for (i = 0; i < TOMOYO_MAX_IO_READ_QUEUE; i++) {
+			const char *w = head->r.w[i];
+			if (w < string || w > string + size)
+				continue;
+			in_use = true;
+			break;
+		}
+		mutex_unlock(&head->io_sem);
+out:
+		spin_lock(&tomoyo_io_buffer_list_lock);
+		head->users--;
+		if (in_use)
+			break;
+	}
+	spin_unlock(&tomoyo_io_buffer_list_lock);
+	return in_use;
+}
+
+/* Structure for garbage collection. */
 struct tomoyo_gc {
 	struct list_head list;
-	int type;
+	enum tomoyo_policy_id type;
+	size_t size;
 	struct list_head *element;
 };
-static LIST_HEAD(tomoyo_gc_queue);
-static DEFINE_MUTEX(tomoyo_gc_mutex);
+/* List of entries to be deleted. */
+static LIST_HEAD(tomoyo_gc_list);
+/* Length of tomoyo_gc_list. */
+static int tomoyo_gc_list_len;
 
-/* Caller holds tomoyo_policy_lock mutex. */
+/**
+ * tomoyo_add_to_gc - Add an entry to to be deleted list.
+ *
+ * @type:    One of values in "enum tomoyo_policy_id".
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true on success, false otherwise.
+ *
+ * Caller holds tomoyo_policy_lock mutex.
+ *
+ * Adding an entry needs kmalloc(). Thus, if we try to add thousands of
+ * entries at once, it will take too long time. Thus, do not add more than 128
+ * entries per a scan. But to be able to handle worst case where all entries
+ * are in-use, we accept one more entry per a scan.
+ *
+ * If we use singly linked list using "struct list_head"->prev (which is
+ * LIST_POISON2), we can avoid kmalloc().
+ */
 static bool tomoyo_add_to_gc(const int type, struct list_head *element)
 {
 	struct tomoyo_gc *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry)
 		return false;
 	entry->type = type;
+	if (type == TOMOYO_ID_ACL)
+		entry->size = tomoyo_acl_size[
+			      container_of(element,
+					   typeof(struct tomoyo_acl_info),
+					   list)->type];
+	else if (type == TOMOYO_ID_NAME)
+		entry->size = strlen(container_of(element,
+						  typeof(struct tomoyo_name),
+						  head.list)->entry.name) + 1;
+	else if (type == TOMOYO_ID_CONDITION)
+		entry->size =
+			container_of(element, typeof(struct tomoyo_condition),
+				     head.list)->size;
+	else
+		entry->size = tomoyo_element_size[type];
 	entry->element = element;
-	list_add(&entry->list, &tomoyo_gc_queue);
+	list_add(&entry->list, &tomoyo_gc_list);
 	list_del_rcu(element);
-	return true;
+	return tomoyo_gc_list_len++ < 128;
 }
 
-static void tomoyo_del_allow_read(struct list_head *element)
+/**
+ * tomoyo_element_linked_by_gc - Validate next element of an entry.
+ *
+ * @element: Pointer to an element.
+ * @size:    Size of @element in byte.
+ *
+ * Returns true if @element is linked by other elements in the garbage
+ * collector's queue, false otherwise.
+ */
+static bool tomoyo_element_linked_by_gc(const u8 *element, const size_t size)
 {
-	struct tomoyo_readable_file *ptr =
-		container_of(element, typeof(*ptr), head.list);
-	tomoyo_put_name(ptr->filename);
+	struct tomoyo_gc *p;
+	list_for_each_entry(p, &tomoyo_gc_list, list) {
+		const u8 *ptr = (const u8 *) p->element->next;
+		if (ptr < element || element + size < ptr)
+			continue;
+		return true;
+	}
+	return false;
 }
 
-static void tomoyo_del_file_pattern(struct list_head *element)
-{
-	struct tomoyo_no_pattern *ptr =
-		container_of(element, typeof(*ptr), head.list);
-	tomoyo_put_name(ptr->pattern);
-}
-
-static void tomoyo_del_no_rewrite(struct list_head *element)
-{
-	struct tomoyo_no_rewrite *ptr =
-		container_of(element, typeof(*ptr), head.list);
-	tomoyo_put_name(ptr->pattern);
-}
-
+/**
+ * tomoyo_del_transition_control - Delete members in "struct tomoyo_transition_control".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_transition_control(struct list_head *element)
 {
 	struct tomoyo_transition_control *ptr =
@@ -61,6 +208,13 @@
 	tomoyo_put_name(ptr->program);
 }
 
+/**
+ * tomoyo_del_aggregator - Delete members in "struct tomoyo_aggregator".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_aggregator(struct list_head *element)
 {
 	struct tomoyo_aggregator *ptr =
@@ -69,6 +223,13 @@
 	tomoyo_put_name(ptr->aggregated_name);
 }
 
+/**
+ * tomoyo_del_manager - Delete members in "struct tomoyo_manager".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_manager(struct list_head *element)
 {
 	struct tomoyo_manager *ptr =
@@ -76,10 +237,18 @@
 	tomoyo_put_name(ptr->manager);
 }
 
+/**
+ * tomoyo_del_acl - Delete members in "struct tomoyo_acl_info".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_acl(struct list_head *element)
 {
 	struct tomoyo_acl_info *acl =
 		container_of(element, typeof(*acl), list);
+	tomoyo_put_condition(acl->cond);
 	switch (acl->type) {
 	case TOMOYO_TYPE_PATH_ACL:
 		{
@@ -127,6 +296,13 @@
 	}
 }
 
+/**
+ * tomoyo_del_domain - Delete members in "struct tomoyo_domain_info".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true if deleted, false otherwise.
+ */
 static bool tomoyo_del_domain(struct list_head *element)
 {
 	struct tomoyo_domain_info *domain =
@@ -165,13 +341,65 @@
 	return true;
 }
 
+/**
+ * tomoyo_del_condition - Delete members in "struct tomoyo_condition".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
+void tomoyo_del_condition(struct list_head *element)
+{
+	struct tomoyo_condition *cond = container_of(element, typeof(*cond),
+						     head.list);
+	const u16 condc = cond->condc;
+	const u16 numbers_count = cond->numbers_count;
+	const u16 names_count = cond->names_count;
+	const u16 argc = cond->argc;
+	const u16 envc = cond->envc;
+	unsigned int i;
+	const struct tomoyo_condition_element *condp
+		= (const struct tomoyo_condition_element *) (cond + 1);
+	struct tomoyo_number_union *numbers_p
+		= (struct tomoyo_number_union *) (condp + condc);
+	struct tomoyo_name_union *names_p
+		= (struct tomoyo_name_union *) (numbers_p + numbers_count);
+	const struct tomoyo_argv *argv
+		= (const struct tomoyo_argv *) (names_p + names_count);
+	const struct tomoyo_envp *envp
+		= (const struct tomoyo_envp *) (argv + argc);
+	for (i = 0; i < numbers_count; i++)
+		tomoyo_put_number_union(numbers_p++);
+	for (i = 0; i < names_count; i++)
+		tomoyo_put_name_union(names_p++);
+	for (i = 0; i < argc; argv++, i++)
+		tomoyo_put_name(argv->value);
+	for (i = 0; i < envc; envp++, i++) {
+		tomoyo_put_name(envp->name);
+		tomoyo_put_name(envp->value);
+	}
+}
 
+/**
+ * tomoyo_del_name - Delete members in "struct tomoyo_name".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_name(struct list_head *element)
 {
 	const struct tomoyo_name *ptr =
-		container_of(element, typeof(*ptr), list);
+		container_of(element, typeof(*ptr), head.list);
 }
 
+/**
+ * tomoyo_del_path_group - Delete members in "struct tomoyo_path_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_path_group(struct list_head *element)
 {
 	struct tomoyo_path_group *member =
@@ -179,20 +407,43 @@
 	tomoyo_put_name(member->member_name);
 }
 
+/**
+ * tomoyo_del_group - Delete "struct tomoyo_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_group(struct list_head *element)
 {
 	struct tomoyo_group *group =
-		container_of(element, typeof(*group), list);
+		container_of(element, typeof(*group), head.list);
 	tomoyo_put_name(group->group_name);
 }
 
+/**
+ * tomoyo_del_number_group - Delete members in "struct tomoyo_number_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_number_group(struct list_head *element)
 {
 	struct tomoyo_number_group *member =
 		container_of(element, typeof(*member), head.list);
 }
 
-static bool tomoyo_collect_member(struct list_head *member_list, int id)
+/**
+ * tomoyo_collect_member - Delete elements with "struct tomoyo_acl_head".
+ *
+ * @id:          One of values in "enum tomoyo_policy_id".
+ * @member_list: Pointer to "struct list_head".
+ *
+ * Returns true if some elements are deleted, false otherwise.
+ */
+static bool tomoyo_collect_member(const enum tomoyo_policy_id id,
+				  struct list_head *member_list)
 {
 	struct tomoyo_acl_head *member;
 	list_for_each_entry(member, member_list, list) {
@@ -201,13 +452,20 @@
 		if (!tomoyo_add_to_gc(id, &member->list))
 			return false;
 	}
-        return true;
+	return true;
 }
 
-static bool tomoyo_collect_acl(struct tomoyo_domain_info *domain)
+/**
+ * tomoyo_collect_acl - Delete elements in "struct tomoyo_domain_info".
+ *
+ * @list: Pointer to "struct list_head".
+ *
+ * Returns true if some elements are deleted, false otherwise.
+ */
+static bool tomoyo_collect_acl(struct list_head *list)
 {
 	struct tomoyo_acl_info *acl;
-	list_for_each_entry(acl, &domain->acl_info_list, list) {
+	list_for_each_entry(acl, list, list) {
 		if (!acl->is_deleted)
 			continue;
 		if (!tomoyo_add_to_gc(TOMOYO_ID_ACL, &acl->list))
@@ -216,19 +474,24 @@
 	return true;
 }
 
+/**
+ * tomoyo_collect_entry - Scan lists for deleted elements.
+ *
+ * Returns nothing.
+ */
 static void tomoyo_collect_entry(void)
 {
 	int i;
+	enum tomoyo_policy_id id;
+	struct tomoyo_policy_namespace *ns;
+	int idx;
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		return;
-	for (i = 0; i < TOMOYO_MAX_POLICY; i++) {
-		if (!tomoyo_collect_member(&tomoyo_policy_list[i], i))
-			goto unlock;
-	}
+	idx = tomoyo_read_lock();
 	{
 		struct tomoyo_domain_info *domain;
 		list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
-			if (!tomoyo_collect_acl(domain))
+			if (!tomoyo_collect_acl(&domain->acl_info_list))
 				goto unlock;
 			if (!domain->is_deleted || atomic_read(&domain->users))
 				continue;
@@ -241,48 +504,93 @@
 				goto unlock;
 		}
 	}
-	for (i = 0; i < TOMOYO_MAX_HASH; i++) {
-		struct tomoyo_name *ptr;
-		list_for_each_entry_rcu(ptr, &tomoyo_name_list[i], list) {
+	list_for_each_entry_rcu(ns, &tomoyo_namespace_list, namespace_list) {
+		for (id = 0; id < TOMOYO_MAX_POLICY; id++)
+			if (!tomoyo_collect_member(id, &ns->policy_list[id]))
+				goto unlock;
+		for (i = 0; i < TOMOYO_MAX_ACL_GROUPS; i++)
+			if (!tomoyo_collect_acl(&ns->acl_group[i]))
+				goto unlock;
+		for (i = 0; i < TOMOYO_MAX_GROUP; i++) {
+			struct list_head *list = &ns->group_list[i];
+			struct tomoyo_group *group;
+			switch (i) {
+			case 0:
+				id = TOMOYO_ID_PATH_GROUP;
+				break;
+			default:
+				id = TOMOYO_ID_NUMBER_GROUP;
+				break;
+			}
+			list_for_each_entry(group, list, head.list) {
+				if (!tomoyo_collect_member
+				    (id, &group->member_list))
+					goto unlock;
+				if (!list_empty(&group->member_list) ||
+				    atomic_read(&group->head.users))
+					continue;
+				if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP,
+						      &group->head.list))
+					goto unlock;
+			}
+		}
+	}
+	id = TOMOYO_ID_CONDITION;
+	for (i = 0; i < TOMOYO_MAX_HASH + 1; i++) {
+		struct list_head *list = !i ?
+			&tomoyo_condition_list : &tomoyo_name_list[i - 1];
+		struct tomoyo_shared_acl_head *ptr;
+		list_for_each_entry(ptr, list, list) {
 			if (atomic_read(&ptr->users))
 				continue;
-			if (!tomoyo_add_to_gc(TOMOYO_ID_NAME, &ptr->list))
+			if (!tomoyo_add_to_gc(id, &ptr->list))
 				goto unlock;
 		}
+		id = TOMOYO_ID_NAME;
 	}
-	for (i = 0; i < TOMOYO_MAX_GROUP; i++) {
-		struct list_head *list = &tomoyo_group_list[i];
-		int id;
-		struct tomoyo_group *group;
-		switch (i) {
-		case 0:
-			id = TOMOYO_ID_PATH_GROUP;
-			break;
-		default:
-			id = TOMOYO_ID_NUMBER_GROUP;
-			break;
-		}
-		list_for_each_entry(group, list, list) {
-			if (!tomoyo_collect_member(&group->member_list, id))
-				goto unlock;
-			if (!list_empty(&group->member_list) ||
-			    atomic_read(&group->users))
-				continue;
-			if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP, &group->list))
-				goto unlock;
-		}
-	}
- unlock:
+unlock:
+	tomoyo_read_unlock(idx);
 	mutex_unlock(&tomoyo_policy_lock);
 }
 
-static void tomoyo_kfree_entry(void)
+/**
+ * tomoyo_kfree_entry - Delete entries in tomoyo_gc_list.
+ *
+ * Returns true if some entries were kfree()d, false otherwise.
+ */
+static bool tomoyo_kfree_entry(void)
 {
 	struct tomoyo_gc *p;
 	struct tomoyo_gc *tmp;
+	bool result = false;
 
-	list_for_each_entry_safe(p, tmp, &tomoyo_gc_queue, list) {
+	list_for_each_entry_safe(p, tmp, &tomoyo_gc_list, list) {
 		struct list_head *element = p->element;
+
+		/*
+		 * list_del_rcu() in tomoyo_add_to_gc() guarantees that the
+		 * list element became no longer reachable from the list which
+		 * the element was originally on (e.g. tomoyo_domain_list).
+		 * Also, synchronize_srcu() in tomoyo_gc_thread() guarantees
+		 * that the list element became no longer referenced by syscall
+		 * users.
+		 *
+		 * However, there are three users which may still be using the
+		 * list element. We need to defer until all of these users
+		 * forget the list element.
+		 *
+		 * Firstly, defer until "struct tomoyo_io_buffer"->r.{domain,
+		 * group,acl} and "struct tomoyo_io_buffer"->w.domain forget
+		 * the list element.
+		 */
+		if (tomoyo_struct_used_by_io_buffer(element))
+			continue;
+		/*
+		 * Secondly, defer until all other elements in the
+		 * tomoyo_gc_list list forget the list element.
+		 */
+		if (tomoyo_element_linked_by_gc((const u8 *) element, p->size))
+			continue;
 		switch (p->type) {
 		case TOMOYO_ID_TRANSITION_CONTROL:
 			tomoyo_del_transition_control(element);
@@ -290,19 +598,21 @@
 		case TOMOYO_ID_AGGREGATOR:
 			tomoyo_del_aggregator(element);
 			break;
-		case TOMOYO_ID_GLOBALLY_READABLE:
-			tomoyo_del_allow_read(element);
-			break;
-		case TOMOYO_ID_PATTERN:
-			tomoyo_del_file_pattern(element);
-			break;
-		case TOMOYO_ID_NO_REWRITE:
-			tomoyo_del_no_rewrite(element);
-			break;
 		case TOMOYO_ID_MANAGER:
 			tomoyo_del_manager(element);
 			break;
+		case TOMOYO_ID_CONDITION:
+			tomoyo_del_condition(element);
+			break;
 		case TOMOYO_ID_NAME:
+			/*
+			 * Thirdly, defer until all "struct tomoyo_io_buffer"
+			 * ->r.w[] forget the list element.
+			 */
+			if (tomoyo_name_used_by_io_buffer(
+			    container_of(element, typeof(struct tomoyo_name),
+					 head.list)->entry.name, p->size))
+				continue;
 			tomoyo_del_name(element);
 			break;
 		case TOMOYO_ID_ACL:
@@ -321,34 +631,95 @@
 		case TOMOYO_ID_NUMBER_GROUP:
 			tomoyo_del_number_group(element);
 			break;
+		case TOMOYO_MAX_POLICY:
+			break;
 		}
 		tomoyo_memory_free(element);
 		list_del(&p->list);
 		kfree(p);
+		tomoyo_gc_list_len--;
+		result = true;
 	}
+	return result;
 }
 
+/**
+ * tomoyo_gc_thread - Garbage collector thread function.
+ *
+ * @unused: Unused.
+ *
+ * In case OOM-killer choose this thread for termination, we create this thread
+ * as a short live thread whenever /sys/kernel/security/tomoyo/ interface was
+ * close()d.
+ *
+ * Returns 0.
+ */
 static int tomoyo_gc_thread(void *unused)
 {
+	/* Garbage collector thread is exclusive. */
+	static DEFINE_MUTEX(tomoyo_gc_mutex);
+	if (!mutex_trylock(&tomoyo_gc_mutex))
+		goto out;
 	daemonize("GC for TOMOYO");
-	if (mutex_trylock(&tomoyo_gc_mutex)) {
-		int i;
-		for (i = 0; i < 10; i++) {
-			tomoyo_collect_entry();
-			if (list_empty(&tomoyo_gc_queue))
-				break;
-			synchronize_srcu(&tomoyo_ss);
-			tomoyo_kfree_entry();
+	do {
+		tomoyo_collect_entry();
+		if (list_empty(&tomoyo_gc_list))
+			break;
+		synchronize_srcu(&tomoyo_ss);
+	} while (tomoyo_kfree_entry());
+	{
+		struct tomoyo_io_buffer *head;
+		struct tomoyo_io_buffer *tmp;
+
+		spin_lock(&tomoyo_io_buffer_list_lock);
+		list_for_each_entry_safe(head, tmp, &tomoyo_io_buffer_list,
+					 list) {
+			if (head->users)
+				continue;
+			list_del(&head->list);
+			kfree(head->read_buf);
+			kfree(head->write_buf);
+			kfree(head);
 		}
-		mutex_unlock(&tomoyo_gc_mutex);
+		spin_unlock(&tomoyo_io_buffer_list_lock);
 	}
-	do_exit(0);
+	mutex_unlock(&tomoyo_gc_mutex);
+out:
+	/* This acts as do_exit(0). */
+	return 0;
 }
 
-void tomoyo_run_gc(void)
+/**
+ * tomoyo_notify_gc - Register/unregister /sys/kernel/security/tomoyo/ users.
+ *
+ * @head:        Pointer to "struct tomoyo_io_buffer".
+ * @is_register: True if register, false if unregister.
+ *
+ * Returns nothing.
+ */
+void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register)
 {
-	struct task_struct *task = kthread_create(tomoyo_gc_thread, NULL,
-						  "GC for TOMOYO");
-	if (!IS_ERR(task))
-		wake_up_process(task);
+	bool is_write = false;
+
+	spin_lock(&tomoyo_io_buffer_list_lock);
+	if (is_register) {
+		head->users = 1;
+		list_add(&head->list, &tomoyo_io_buffer_list);
+	} else {
+		is_write = head->write_buf != NULL;
+		if (!--head->users) {
+			list_del(&head->list);
+			kfree(head->read_buf);
+			kfree(head->write_buf);
+			kfree(head);
+		}
+	}
+	spin_unlock(&tomoyo_io_buffer_list_lock);
+	if (is_write) {
+		struct task_struct *task = kthread_create(tomoyo_gc_thread,
+							  NULL,
+							  "GC for TOMOYO");
+		if (!IS_ERR(task))
+			wake_up_process(task);
+	}
 }
diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c
index e94352c..5fb0e12 100644
--- a/security/tomoyo/group.c
+++ b/security/tomoyo/group.c
@@ -1,21 +1,37 @@
 /*
  * security/tomoyo/group.c
  *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/slab.h>
 #include "common.h"
 
+/**
+ * tomoyo_same_path_group - Check for duplicated "struct tomoyo_path_group" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_path_group(const struct tomoyo_acl_head *a,
-				const struct tomoyo_acl_head *b)
+				   const struct tomoyo_acl_head *b)
 {
 	return container_of(a, struct tomoyo_path_group, head)->member_name ==
 		container_of(b, struct tomoyo_path_group, head)->member_name;
 }
 
+/**
+ * tomoyo_same_number_group - Check for duplicated "struct tomoyo_number_group" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_number_group(const struct tomoyo_acl_head *a,
-				  const struct tomoyo_acl_head *b)
+				     const struct tomoyo_acl_head *b)
 {
 	return !memcmp(&container_of(a, struct tomoyo_number_group, head)
 		       ->number,
@@ -28,48 +44,41 @@
 /**
  * tomoyo_write_group - Write "struct tomoyo_path_group"/"struct tomoyo_number_group" list.
  *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- * @type:      Type of this group.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @type:  Type of this group.
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_write_group(char *data, const bool is_delete, const u8 type)
+int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type)
 {
-	struct tomoyo_group *group;
-	struct list_head *member;
-	char *w[2];
+	struct tomoyo_group *group = tomoyo_get_group(param, type);
 	int error = -EINVAL;
-	if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0])
-		return -EINVAL;
-	group = tomoyo_get_group(w[0], type);
 	if (!group)
 		return -ENOMEM;
-	member = &group->member_list;
+	param->list = &group->member_list;
 	if (type == TOMOYO_PATH_GROUP) {
 		struct tomoyo_path_group e = { };
-		e.member_name = tomoyo_get_name(w[1]);
+		e.member_name = tomoyo_get_name(tomoyo_read_token(param));
 		if (!e.member_name) {
 			error = -ENOMEM;
 			goto out;
 		}
-		error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-					     member, tomoyo_same_path_group);
+		error = tomoyo_update_policy(&e.head, sizeof(e), param,
+					  tomoyo_same_path_group);
 		tomoyo_put_name(e.member_name);
 	} else if (type == TOMOYO_NUMBER_GROUP) {
 		struct tomoyo_number_group e = { };
-		if (w[1][0] == '@'
-		    || !tomoyo_parse_number_union(w[1], &e.number)
-		    || e.number.values[0] > e.number.values[1])
+		if (param->data[0] == '@' ||
+		    !tomoyo_parse_number_union(param, &e.number))
 			goto out;
-		error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-					     member, tomoyo_same_number_group);
+		error = tomoyo_update_policy(&e.head, sizeof(e), param,
+					  tomoyo_same_number_group);
 		/*
 		 * tomoyo_put_number_union() is not needed because
-		 * w[1][0] != '@'.
+		 * param->data[0] != '@'.
 		 */
 	}
- out:
+out:
 	tomoyo_put_group(group);
 	return error;
 }
@@ -77,8 +86,8 @@
 /**
  * tomoyo_path_matches_group - Check whether the given pathname matches members of the given pathname group.
  *
- * @pathname:        The name of pathname.
- * @group:           Pointer to "struct tomoyo_path_group".
+ * @pathname: The name of pathname.
+ * @group:    Pointer to "struct tomoyo_path_group".
  *
  * Returns matched member's pathname if @pathname matches pathnames in @group,
  * NULL otherwise.
diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c
index 3312e56..6797540 100644
--- a/security/tomoyo/load_policy.c
+++ b/security/tomoyo/load_policy.c
@@ -1,15 +1,32 @@
 /*
  * security/tomoyo/load_policy.c
  *
- * Policy loader launcher for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
 
-/* path to policy loader */
-static const char *tomoyo_loader = "/sbin/tomoyo-init";
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+
+/*
+ * Path to the policy loader. (default = CONFIG_SECURITY_TOMOYO_POLICY_LOADER)
+ */
+static const char *tomoyo_loader;
+
+/**
+ * tomoyo_loader_setup - Set policy loader.
+ *
+ * @str: Program to use as a policy loader (e.g. /sbin/tomoyo-init ).
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_loader_setup(char *str)
+{
+	tomoyo_loader = str;
+	return 0;
+}
+
+__setup("TOMOYO_loader=", tomoyo_loader_setup);
 
 /**
  * tomoyo_policy_loader_exists - Check whether /sbin/tomoyo-init exists.
@@ -18,24 +35,38 @@
  */
 static bool tomoyo_policy_loader_exists(void)
 {
-	/*
-	 * Don't activate MAC if the policy loader doesn't exist.
-	 * If the initrd includes /sbin/init but real-root-dev has not
-	 * mounted on / yet, activating MAC will block the system since
-	 * policies are not loaded yet.
-	 * Thus, let do_execve() call this function every time.
-	 */
 	struct path path;
-
+	if (!tomoyo_loader)
+		tomoyo_loader = CONFIG_SECURITY_TOMOYO_POLICY_LOADER;
 	if (kern_path(tomoyo_loader, LOOKUP_FOLLOW, &path)) {
-		printk(KERN_INFO "Not activating Mandatory Access Control now "
-		       "since %s doesn't exist.\n", tomoyo_loader);
+		printk(KERN_INFO "Not activating Mandatory Access Control "
+		       "as %s does not exist.\n", tomoyo_loader);
 		return false;
 	}
 	path_put(&path);
 	return true;
 }
 
+/*
+ * Path to the trigger. (default = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER)
+ */
+static const char *tomoyo_trigger;
+
+/**
+ * tomoyo_trigger_setup - Set trigger for activation.
+ *
+ * @str: Program to use as an activation trigger (e.g. /sbin/init ).
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_trigger_setup(char *str)
+{
+	tomoyo_trigger = str;
+	return 0;
+}
+
+__setup("TOMOYO_trigger=", tomoyo_trigger_setup);
+
 /**
  * tomoyo_load_policy - Run external policy loader to load policy.
  *
@@ -51,24 +82,19 @@
  */
 void tomoyo_load_policy(const char *filename)
 {
+	static bool done;
 	char *argv[2];
 	char *envp[3];
 
-	if (tomoyo_policy_loaded)
+	if (tomoyo_policy_loaded || done)
 		return;
-	/*
-	 * Check filename is /sbin/init or /sbin/tomoyo-start.
-	 * /sbin/tomoyo-start is a dummy filename in case where /sbin/init can't
-	 * be passed.
-	 * You can create /sbin/tomoyo-start by
-	 * "ln -s /bin/true /sbin/tomoyo-start".
-	 */
-	if (strcmp(filename, "/sbin/init") &&
-	    strcmp(filename, "/sbin/tomoyo-start"))
+	if (!tomoyo_trigger)
+		tomoyo_trigger = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER;
+	if (strcmp(filename, tomoyo_trigger))
 		return;
 	if (!tomoyo_policy_loader_exists())
 		return;
-
+	done = true;
 	printk(KERN_INFO "Calling %s to load policy. Please wait.\n",
 	       tomoyo_loader);
 	argv[0] = (char *) tomoyo_loader;
@@ -79,3 +105,5 @@
 	call_usermodehelper(argv[0], argv, envp, 1);
 	tomoyo_check_profile();
 }
+
+#endif
diff --git a/security/tomoyo/memory.c b/security/tomoyo/memory.c
index 42a7b1b..7a56051 100644
--- a/security/tomoyo/memory.c
+++ b/security/tomoyo/memory.c
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/memory.c
  *
- * Memory management functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/hash.h>
@@ -29,10 +27,12 @@
 		panic("MAC Initialization failed.\n");
 }
 
-/* Memory allocated for policy. */
-static atomic_t tomoyo_policy_memory_size;
-/* Quota for holding policy. */
-static unsigned int tomoyo_quota_for_policy;
+/* Lock for protecting tomoyo_memory_used. */
+static DEFINE_SPINLOCK(tomoyo_policy_memory_lock);
+/* Memoy currently used by policy/audit log/query. */
+unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
+/* Memory quota for "policy"/"audit log"/"query". */
+unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
 
 /**
  * tomoyo_memory_ok - Check memory quota.
@@ -45,15 +45,20 @@
  */
 bool tomoyo_memory_ok(void *ptr)
 {
-	size_t s = ptr ? ksize(ptr) : 0;
-	atomic_add(s, &tomoyo_policy_memory_size);
-	if (ptr && (!tomoyo_quota_for_policy ||
-		    atomic_read(&tomoyo_policy_memory_size)
-		    <= tomoyo_quota_for_policy)) {
-		memset(ptr, 0, s);
-		return true;
+	if (ptr) {
+		const size_t s = ksize(ptr);
+		bool result;
+		spin_lock(&tomoyo_policy_memory_lock);
+		tomoyo_memory_used[TOMOYO_MEMORY_POLICY] += s;
+		result = !tomoyo_memory_quota[TOMOYO_MEMORY_POLICY] ||
+			tomoyo_memory_used[TOMOYO_MEMORY_POLICY] <=
+			tomoyo_memory_quota[TOMOYO_MEMORY_POLICY];
+		if (!result)
+			tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s;
+		spin_unlock(&tomoyo_policy_memory_lock);
+		if (result)
+			return true;
 	}
-	atomic_sub(s, &tomoyo_policy_memory_size);
 	tomoyo_warn_oom(__func__);
 	return false;
 }
@@ -86,22 +91,28 @@
  */
 void tomoyo_memory_free(void *ptr)
 {
-	atomic_sub(ksize(ptr), &tomoyo_policy_memory_size);
+	size_t s = ksize(ptr);
+	spin_lock(&tomoyo_policy_memory_lock);
+	tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s;
+	spin_unlock(&tomoyo_policy_memory_lock);
 	kfree(ptr);
 }
 
 /**
  * tomoyo_get_group - Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group".
  *
- * @group_name: The name of address group.
- * @idx:        Index number.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @idx:   Index number.
  *
  * Returns pointer to "struct tomoyo_group" on success, NULL otherwise.
  */
-struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx)
+struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
+				      const u8 idx)
 {
 	struct tomoyo_group e = { };
 	struct tomoyo_group *group = NULL;
+	struct list_head *list;
+	const char *group_name = tomoyo_read_token(param);
 	bool found = false;
 	if (!tomoyo_correct_word(group_name) || idx >= TOMOYO_MAX_GROUP)
 		return NULL;
@@ -110,10 +121,11 @@
 		return NULL;
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		goto out;
-	list_for_each_entry(group, &tomoyo_group_list[idx], list) {
+	list = &param->ns->group_list[idx];
+	list_for_each_entry(group, list, head.list) {
 		if (e.group_name != group->group_name)
 			continue;
-		atomic_inc(&group->users);
+		atomic_inc(&group->head.users);
 		found = true;
 		break;
 	}
@@ -121,15 +133,14 @@
 		struct tomoyo_group *entry = tomoyo_commit_ok(&e, sizeof(e));
 		if (entry) {
 			INIT_LIST_HEAD(&entry->member_list);
-			atomic_set(&entry->users, 1);
-			list_add_tail_rcu(&entry->list,
-					  &tomoyo_group_list[idx]);
+			atomic_set(&entry->head.users, 1);
+			list_add_tail_rcu(&entry->head.list, list);
 			group = entry;
 			found = true;
 		}
 	}
 	mutex_unlock(&tomoyo_policy_lock);
- out:
+out:
 	tomoyo_put_name(e.group_name);
 	return found ? group : NULL;
 }
@@ -154,7 +165,6 @@
 	struct tomoyo_name *ptr;
 	unsigned int hash;
 	int len;
-	int allocated_len;
 	struct list_head *head;
 
 	if (!name)
@@ -164,120 +174,43 @@
 	head = &tomoyo_name_list[hash_long(hash, TOMOYO_HASH_BITS)];
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		return NULL;
-	list_for_each_entry(ptr, head, list) {
+	list_for_each_entry(ptr, head, head.list) {
 		if (hash != ptr->entry.hash || strcmp(name, ptr->entry.name))
 			continue;
-		atomic_inc(&ptr->users);
+		atomic_inc(&ptr->head.users);
 		goto out;
 	}
 	ptr = kzalloc(sizeof(*ptr) + len, GFP_NOFS);
-	allocated_len = ptr ? ksize(ptr) : 0;
-	if (!ptr || (tomoyo_quota_for_policy &&
-		     atomic_read(&tomoyo_policy_memory_size) + allocated_len
-		     > tomoyo_quota_for_policy)) {
+	if (tomoyo_memory_ok(ptr)) {
+		ptr->entry.name = ((char *) ptr) + sizeof(*ptr);
+		memmove((char *) ptr->entry.name, name, len);
+		atomic_set(&ptr->head.users, 1);
+		tomoyo_fill_path_info(&ptr->entry);
+		list_add_tail(&ptr->head.list, head);
+	} else {
 		kfree(ptr);
 		ptr = NULL;
-		tomoyo_warn_oom(__func__);
-		goto out;
 	}
-	atomic_add(allocated_len, &tomoyo_policy_memory_size);
-	ptr->entry.name = ((char *) ptr) + sizeof(*ptr);
-	memmove((char *) ptr->entry.name, name, len);
-	atomic_set(&ptr->users, 1);
-	tomoyo_fill_path_info(&ptr->entry);
-	list_add_tail(&ptr->list, head);
- out:
+out:
 	mutex_unlock(&tomoyo_policy_lock);
 	return ptr ? &ptr->entry : NULL;
 }
 
+/* Initial namespace.*/
+struct tomoyo_policy_namespace tomoyo_kernel_namespace;
+
 /**
  * tomoyo_mm_init - Initialize mm related code.
  */
 void __init tomoyo_mm_init(void)
 {
 	int idx;
-
-	for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
-		INIT_LIST_HEAD(&tomoyo_policy_list[idx]);
-	for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
-		INIT_LIST_HEAD(&tomoyo_group_list[idx]);
 	for (idx = 0; idx < TOMOYO_MAX_HASH; idx++)
 		INIT_LIST_HEAD(&tomoyo_name_list[idx]);
+	tomoyo_kernel_namespace.name = "<kernel>";
+	tomoyo_init_policy_namespace(&tomoyo_kernel_namespace);
+	tomoyo_kernel_domain.ns = &tomoyo_kernel_namespace;
 	INIT_LIST_HEAD(&tomoyo_kernel_domain.acl_info_list);
-	tomoyo_kernel_domain.domainname = tomoyo_get_name(TOMOYO_ROOT_NAME);
+	tomoyo_kernel_domain.domainname = tomoyo_get_name("<kernel>");
 	list_add_tail_rcu(&tomoyo_kernel_domain.list, &tomoyo_domain_list);
-	idx = tomoyo_read_lock();
-	if (tomoyo_find_domain(TOMOYO_ROOT_NAME) != &tomoyo_kernel_domain)
-		panic("Can't register tomoyo_kernel_domain");
-	{
-		/* Load built-in policy. */
-		tomoyo_write_transition_control("/sbin/hotplug", false,
-					TOMOYO_TRANSITION_CONTROL_INITIALIZE);
-		tomoyo_write_transition_control("/sbin/modprobe", false,
-					TOMOYO_TRANSITION_CONTROL_INITIALIZE);
-	}
-	tomoyo_read_unlock(idx);
-}
-
-
-/* Memory allocated for query lists. */
-unsigned int tomoyo_query_memory_size;
-/* Quota for holding query lists. */
-unsigned int tomoyo_quota_for_query;
-
-/**
- * tomoyo_read_memory_counter - Check for memory usage in bytes.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns memory usage.
- */
-void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head)
-{
-	if (!head->r.eof) {
-		const unsigned int policy
-			= atomic_read(&tomoyo_policy_memory_size);
-		const unsigned int query = tomoyo_query_memory_size;
-		char buffer[64];
-
-		memset(buffer, 0, sizeof(buffer));
-		if (tomoyo_quota_for_policy)
-			snprintf(buffer, sizeof(buffer) - 1,
-				 "   (Quota: %10u)",
-				 tomoyo_quota_for_policy);
-		else
-			buffer[0] = '\0';
-		tomoyo_io_printf(head, "Policy:       %10u%s\n", policy,
-				 buffer);
-		if (tomoyo_quota_for_query)
-			snprintf(buffer, sizeof(buffer) - 1,
-				 "   (Quota: %10u)",
-				 tomoyo_quota_for_query);
-		else
-			buffer[0] = '\0';
-		tomoyo_io_printf(head, "Query lists:  %10u%s\n", query,
-				 buffer);
-		tomoyo_io_printf(head, "Total:        %10u\n", policy + query);
-		head->r.eof = true;
-	}
-}
-
-/**
- * tomoyo_write_memory_quota - Set memory quota.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns 0.
- */
-int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head)
-{
-	char *data = head->write_buf;
-	unsigned int size;
-
-	if (sscanf(data, "Policy: %u", &size) == 1)
-		tomoyo_quota_for_policy = size;
-	else if (sscanf(data, "Query lists: %u", &size) == 1)
-		tomoyo_quota_for_query = size;
-	return 0;
 }
diff --git a/security/tomoyo/mount.c b/security/tomoyo/mount.c
index 9fc2e15..bee09d0 100644
--- a/security/tomoyo/mount.c
+++ b/security/tomoyo/mount.c
@@ -1,28 +1,22 @@
 /*
  * security/tomoyo/mount.c
  *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/slab.h>
 #include "common.h"
 
-/* Keywords for mount restrictions. */
-
-/* Allow to call 'mount --bind /source_dir /dest_dir' */
-#define TOMOYO_MOUNT_BIND_KEYWORD                        "--bind"
-/* Allow to call 'mount --move /old_dir    /new_dir ' */
-#define TOMOYO_MOUNT_MOVE_KEYWORD                        "--move"
-/* Allow to call 'mount -o remount /dir             ' */
-#define TOMOYO_MOUNT_REMOUNT_KEYWORD                     "--remount"
-/* Allow to call 'mount --make-unbindable /dir'       */
-#define TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD             "--make-unbindable"
-/* Allow to call 'mount --make-private /dir'          */
-#define TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD                "--make-private"
-/* Allow to call 'mount --make-slave /dir'            */
-#define TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD                  "--make-slave"
-/* Allow to call 'mount --make-shared /dir'           */
-#define TOMOYO_MOUNT_MAKE_SHARED_KEYWORD                 "--make-shared"
+/* String table for special mount operations. */
+static const char * const tomoyo_mounts[TOMOYO_MAX_SPECIAL_MOUNT] = {
+	[TOMOYO_MOUNT_BIND]            = "--bind",
+	[TOMOYO_MOUNT_MOVE]            = "--move",
+	[TOMOYO_MOUNT_REMOUNT]         = "--remount",
+	[TOMOYO_MOUNT_MAKE_UNBINDABLE] = "--make-unbindable",
+	[TOMOYO_MOUNT_MAKE_PRIVATE]    = "--make-private",
+	[TOMOYO_MOUNT_MAKE_SLAVE]      = "--make-slave",
+	[TOMOYO_MOUNT_MAKE_SHARED]     = "--make-shared",
+};
 
 /**
  * tomoyo_audit_mount_log - Audit mount log.
@@ -33,50 +27,42 @@
  */
 static int tomoyo_audit_mount_log(struct tomoyo_request_info *r)
 {
-	const char *dev = r->param.mount.dev->name;
-	const char *dir = r->param.mount.dir->name;
-	const char *type = r->param.mount.type->name;
-	const unsigned long flags = r->param.mount.flags;
-	if (r->granted)
-		return 0;
-	if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD))
-		tomoyo_warn_log(r, "mount -o remount %s 0x%lX", dir, flags);
-	else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD)
-		 || !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD))
-		tomoyo_warn_log(r, "mount %s %s %s 0x%lX", type, dev, dir,
-				flags);
-	else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) ||
-		 !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) ||
-		 !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) ||
-		 !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD))
-		tomoyo_warn_log(r, "mount %s %s 0x%lX", type, dir, flags);
-	else
-		tomoyo_warn_log(r, "mount -t %s %s %s 0x%lX", type, dev, dir,
-				flags);
-	return tomoyo_supervisor(r,
-				 TOMOYO_KEYWORD_ALLOW_MOUNT "%s %s %s 0x%lX\n",
-				 tomoyo_pattern(r->param.mount.dev),
-				 tomoyo_pattern(r->param.mount.dir), type,
-				 flags);
+	return tomoyo_supervisor(r, "file mount %s %s %s 0x%lX\n",
+				 r->param.mount.dev->name,
+				 r->param.mount.dir->name,
+				 r->param.mount.type->name,
+				 r->param.mount.flags);
 }
 
+/**
+ * tomoyo_check_mount_acl - Check permission for path path path number operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_mount_acl(struct tomoyo_request_info *r,
 				   const struct tomoyo_acl_info *ptr)
 {
 	const struct tomoyo_mount_acl *acl =
 		container_of(ptr, typeof(*acl), head);
-	return tomoyo_compare_number_union(r->param.mount.flags, &acl->flags) &&
-		tomoyo_compare_name_union(r->param.mount.type, &acl->fs_type) &&
-		tomoyo_compare_name_union(r->param.mount.dir, &acl->dir_name) &&
+	return tomoyo_compare_number_union(r->param.mount.flags,
+					   &acl->flags) &&
+		tomoyo_compare_name_union(r->param.mount.type,
+					  &acl->fs_type) &&
+		tomoyo_compare_name_union(r->param.mount.dir,
+					  &acl->dir_name) &&
 		(!r->param.mount.need_dev ||
-		 tomoyo_compare_name_union(r->param.mount.dev, &acl->dev_name));
+		 tomoyo_compare_name_union(r->param.mount.dev,
+					   &acl->dev_name));
 }
 
 /**
  * tomoyo_mount_acl - Check permission for mount() operation.
  *
  * @r:        Pointer to "struct tomoyo_request_info".
- * @dev_name: Name of device file.
+ * @dev_name: Name of device file. Maybe NULL.
  * @dir:      Pointer to "struct path".
  * @type:     Name of filesystem type.
  * @flags:    Mount options.
@@ -86,8 +72,10 @@
  * Caller holds tomoyo_read_lock().
  */
 static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
-			    struct path *dir, char *type, unsigned long flags)
+			    struct path *dir, const char *type,
+			    unsigned long flags)
 {
+	struct tomoyo_obj_info obj = { };
 	struct path path;
 	struct file_system_type *fstype = NULL;
 	const char *requested_type = NULL;
@@ -98,6 +86,7 @@
 	struct tomoyo_path_info rdir;
 	int need_dev = 0;
 	int error = -ENOMEM;
+	r->obj = &obj;
 
 	/* Get fstype. */
 	requested_type = tomoyo_encode(type);
@@ -107,6 +96,7 @@
 	tomoyo_fill_path_info(&rtype);
 
 	/* Get mount point. */
+	obj.path2 = *dir;
 	requested_dir_name = tomoyo_realpath_from_path(dir);
 	if (!requested_dir_name) {
 		error = -ENOMEM;
@@ -116,15 +106,15 @@
 	tomoyo_fill_path_info(&rdir);
 
 	/* Compare fs name. */
-	if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD)) {
+	if (type == tomoyo_mounts[TOMOYO_MOUNT_REMOUNT]) {
 		/* dev_name is ignored. */
-	} else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) ||
-		   !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) ||
-		   !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) ||
-		   !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD)) {
+	} else if (type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE] ||
+		   type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE] ||
+		   type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE] ||
+		   type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED]) {
 		/* dev_name is ignored. */
-	} else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD) ||
-		   !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD)) {
+	} else if (type == tomoyo_mounts[TOMOYO_MOUNT_BIND] ||
+		   type == tomoyo_mounts[TOMOYO_MOUNT_MOVE]) {
 		need_dev = -1; /* dev_name is a directory */
 	} else {
 		fstype = get_fs_type(type);
@@ -142,8 +132,8 @@
 			error = -ENOENT;
 			goto out;
 		}
+		obj.path1 = path;
 		requested_dev_name = tomoyo_realpath_from_path(&path);
-		path_put(&path);
 		if (!requested_dev_name) {
 			error = -ENOENT;
 			goto out;
@@ -176,22 +166,26 @@
 	if (fstype)
 		put_filesystem(fstype);
 	kfree(requested_type);
+	/* Drop refcount obtained by kern_path(). */
+	if (obj.path1.dentry)
+		path_put(&obj.path1);
 	return error;
 }
 
 /**
  * tomoyo_mount_permission - Check permission for mount() operation.
  *
- * @dev_name:  Name of device file.
+ * @dev_name:  Name of device file. Maybe NULL.
  * @path:      Pointer to "struct path".
- * @type:      Name of filesystem type. May be NULL.
+ * @type:      Name of filesystem type. Maybe NULL.
  * @flags:     Mount options.
- * @data_page: Optional data. May be NULL.
+ * @data_page: Optional data. Maybe NULL.
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
-			    unsigned long flags, void *data_page)
+int tomoyo_mount_permission(char *dev_name, struct path *path,
+			    const char *type, unsigned long flags,
+			    void *data_page)
 {
 	struct tomoyo_request_info r;
 	int error;
@@ -203,31 +197,31 @@
 	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
 		flags &= ~MS_MGC_MSK;
 	if (flags & MS_REMOUNT) {
-		type = TOMOYO_MOUNT_REMOUNT_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_REMOUNT];
 		flags &= ~MS_REMOUNT;
 	}
 	if (flags & MS_MOVE) {
-		type = TOMOYO_MOUNT_MOVE_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_MOVE];
 		flags &= ~MS_MOVE;
 	}
 	if (flags & MS_BIND) {
-		type = TOMOYO_MOUNT_BIND_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_BIND];
 		flags &= ~MS_BIND;
 	}
 	if (flags & MS_UNBINDABLE) {
-		type = TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE];
 		flags &= ~MS_UNBINDABLE;
 	}
 	if (flags & MS_PRIVATE) {
-		type = TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE];
 		flags &= ~MS_PRIVATE;
 	}
 	if (flags & MS_SLAVE) {
-		type = TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE];
 		flags &= ~MS_SLAVE;
 	}
 	if (flags & MS_SHARED) {
-		type = TOMOYO_MOUNT_MAKE_SHARED_KEYWORD;
+		type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED];
 		flags &= ~MS_SHARED;
 	}
 	if (!type)
@@ -237,49 +231,3 @@
 	tomoyo_read_unlock(idx);
 	return error;
 }
-
-static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
-				  const struct tomoyo_acl_info *b)
-{
-	const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
-	const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);
-	return tomoyo_same_acl_head(&p1->head, &p2->head) &&
-		tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
-		tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
-		tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
-		tomoyo_same_number_union(&p1->flags, &p2->flags);
-}
-
-/**
- * tomoyo_write_mount - Write "struct tomoyo_mount_acl" list.
- *
- * @data:      String to parse.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain,
-		       const bool is_delete)
-{
-	struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
-	int error = is_delete ? -ENOENT : -ENOMEM;
-	char *w[4];
-	if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[3][0])
-		return -EINVAL;
-	if (!tomoyo_parse_name_union(w[0], &e.dev_name) ||
-	    !tomoyo_parse_name_union(w[1], &e.dir_name) ||
-	    !tomoyo_parse_name_union(w[2], &e.fs_type) ||
-	    !tomoyo_parse_number_union(w[3], &e.flags))
-		goto out;
-	error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-				     tomoyo_same_mount_acl, NULL);
- out:
-	tomoyo_put_name_union(&e.dev_name);
-	tomoyo_put_name_union(&e.dir_name);
-	tomoyo_put_name_union(&e.fs_type);
-	tomoyo_put_number_union(&e.flags);
-	return error;
-}
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 8d95e91c9..6c601bd 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/realpath.c
  *
- * Pathname calculation functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/types.h>
@@ -70,6 +68,161 @@
 }
 
 /**
+ * tomoyo_get_absolute_path - Get the path of a dentry but ignores chroot'ed root.
+ *
+ * @path:   Pointer to "struct path".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ *
+ * If dentry is a directory, trailing '/' is appended.
+ */
+static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
+				      const int buflen)
+{
+	char *pos = ERR_PTR(-ENOMEM);
+	if (buflen >= 256) {
+		struct path ns_root = { };
+		/* go to whatever namespace root we are under */
+		pos = __d_path(path, &ns_root, buffer, buflen - 1);
+		if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
+			struct inode *inode = path->dentry->d_inode;
+			if (inode && S_ISDIR(inode->i_mode)) {
+				buffer[buflen - 2] = '/';
+				buffer[buflen - 1] = '\0';
+			}
+		}
+	}
+	return pos;
+}
+
+/**
+ * tomoyo_get_dentry_path - Get the path of a dentry.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ *
+ * If dentry is a directory, trailing '/' is appended.
+ */
+static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer,
+				    const int buflen)
+{
+	char *pos = ERR_PTR(-ENOMEM);
+	if (buflen >= 256) {
+		pos = dentry_path_raw(dentry, buffer, buflen - 1);
+		if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
+			struct inode *inode = dentry->d_inode;
+			if (inode && S_ISDIR(inode->i_mode)) {
+				buffer[buflen - 2] = '/';
+				buffer[buflen - 1] = '\0';
+			}
+		}
+	}
+	return pos;
+}
+
+/**
+ * tomoyo_get_local_path - Get the path of a dentry.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ */
+static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
+				   const int buflen)
+{
+	struct super_block *sb = dentry->d_sb;
+	char *pos = tomoyo_get_dentry_path(dentry, buffer, buflen);
+	if (IS_ERR(pos))
+		return pos;
+	/* Convert from $PID to self if $PID is current thread. */
+	if (sb->s_magic == PROC_SUPER_MAGIC && *pos == '/') {
+		char *ep;
+		const pid_t pid = (pid_t) simple_strtoul(pos + 1, &ep, 10);
+		if (*ep == '/' && pid && pid ==
+		    task_tgid_nr_ns(current, sb->s_fs_info)) {
+			pos = ep - 5;
+			if (pos < buffer)
+				goto out;
+			memmove(pos, "/self", 5);
+		}
+		goto prepend_filesystem_name;
+	}
+	/* Use filesystem name for unnamed devices. */
+	if (!MAJOR(sb->s_dev))
+		goto prepend_filesystem_name;
+	{
+		struct inode *inode = sb->s_root->d_inode;
+		/*
+		 * Use filesystem name if filesystem does not support rename()
+		 * operation.
+		 */
+		if (inode->i_op && !inode->i_op->rename)
+			goto prepend_filesystem_name;
+	}
+	/* Prepend device name. */
+	{
+		char name[64];
+		int name_len;
+		const dev_t dev = sb->s_dev;
+		name[sizeof(name) - 1] = '\0';
+		snprintf(name, sizeof(name) - 1, "dev(%u,%u):", MAJOR(dev),
+			 MINOR(dev));
+		name_len = strlen(name);
+		pos -= name_len;
+		if (pos < buffer)
+			goto out;
+		memmove(pos, name, name_len);
+		return pos;
+	}
+	/* Prepend filesystem name. */
+prepend_filesystem_name:
+	{
+		const char *name = sb->s_type->name;
+		const int name_len = strlen(name);
+		pos -= name_len + 1;
+		if (pos < buffer)
+			goto out;
+		memmove(pos, name, name_len);
+		pos[name_len] = ':';
+	}
+	return pos;
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * tomoyo_get_socket_name - Get the name of a socket.
+ *
+ * @path:   Pointer to "struct path".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer.
+ */
+static char *tomoyo_get_socket_name(struct path *path, char * const buffer,
+				    const int buflen)
+{
+	struct inode *inode = path->dentry->d_inode;
+	struct socket *sock = inode ? SOCKET_I(inode) : NULL;
+	struct sock *sk = sock ? sock->sk : NULL;
+	if (sk) {
+		snprintf(buffer, buflen, "socket:[family=%u:type=%u:"
+			 "protocol=%u]", sk->sk_family, sk->sk_type,
+			 sk->sk_protocol);
+	} else {
+		snprintf(buffer, buflen, "socket:[unknown]");
+	}
+	return buffer;
+}
+
+/**
  * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
  *
  * @path: Pointer to "struct path".
@@ -90,55 +243,42 @@
 	char *name = NULL;
 	unsigned int buf_len = PAGE_SIZE / 2;
 	struct dentry *dentry = path->dentry;
-	bool is_dir;
+	struct super_block *sb;
 	if (!dentry)
 		return NULL;
-	is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode);
+	sb = dentry->d_sb;
 	while (1) {
-		struct path ns_root = { .mnt = NULL, .dentry = NULL };
 		char *pos;
+		struct inode *inode;
 		buf_len <<= 1;
 		kfree(buf);
 		buf = kmalloc(buf_len, GFP_NOFS);
 		if (!buf)
 			break;
+		/* To make sure that pos is '\0' terminated. */
+		buf[buf_len - 1] = '\0';
 		/* Get better name for socket. */
-		if (dentry->d_sb->s_magic == SOCKFS_MAGIC) {
-			struct inode *inode = dentry->d_inode;
-			struct socket *sock = inode ? SOCKET_I(inode) : NULL;
-			struct sock *sk = sock ? sock->sk : NULL;
-			if (sk) {
-				snprintf(buf, buf_len - 1, "socket:[family=%u:"
-					 "type=%u:protocol=%u]", sk->sk_family,
-					 sk->sk_type, sk->sk_protocol);
-			} else {
-				snprintf(buf, buf_len - 1, "socket:[unknown]");
-			}
-			name = tomoyo_encode(buf);
-			break;
+		if (sb->s_magic == SOCKFS_MAGIC) {
+			pos = tomoyo_get_socket_name(path, buf, buf_len - 1);
+			goto encode;
 		}
-		/* For "socket:[\$]" and "pipe:[\$]". */
+		/* For "pipe:[\$]". */
 		if (dentry->d_op && dentry->d_op->d_dname) {
 			pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
-			if (IS_ERR(pos))
-				continue;
-			name = tomoyo_encode(pos);
-			break;
+			goto encode;
 		}
-		/* If we don't have a vfsmount, we can't calculate. */
-		if (!path->mnt)
-			break;
-		/* go to whatever namespace root we are under */
-		pos = __d_path(path, &ns_root, buf, buf_len);
-		/* Prepend "/proc" prefix if using internal proc vfs mount. */
-		if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
-		    (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
-			pos -= 5;
-			if (pos >= buf)
-				memcpy(pos, "/proc", 5);
-			else
-				pos = ERR_PTR(-ENOMEM);
-		}
+		inode = sb->s_root->d_inode;
+		/*
+		 * Get local name for filesystems without rename() operation
+		 * or dentry without vfsmount.
+		 */
+		if (!path->mnt || (inode->i_op && !inode->i_op->rename))
+			pos = tomoyo_get_local_path(path->dentry, buf,
+						    buf_len - 1);
+		/* Get absolute name for the rest. */
+		else
+			pos = tomoyo_get_absolute_path(path, buf, buf_len - 1);
+encode:
 		if (IS_ERR(pos))
 			continue;
 		name = tomoyo_encode(pos);
@@ -147,16 +287,6 @@
 	kfree(buf);
 	if (!name)
 		tomoyo_warn_oom(__func__);
-	else if (is_dir && *name) {
-		/* Append trailing '/' if dentry is a directory. */
-		char *pos = name + strlen(name) - 1;
-		if (*pos != '/')
-			/*
-			 * This is OK because tomoyo_encode() reserves space
-			 * for appending "/".
-			 */
-			*++pos = '/';
-	}
 	return name;
 }
 
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index e43d555..a49c3bf 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -1,9 +1,7 @@
 /*
- * security/tomoyo/common.c
+ * security/tomoyo/securityfs_if.c
  *
- * Securityfs interface for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/security.h>
@@ -34,11 +32,11 @@
  */
 static int tomoyo_release(struct inode *inode, struct file *file)
 {
-	return tomoyo_close_control(file);
+	return tomoyo_close_control(file->private_data);
 }
 
 /**
- * tomoyo_poll - poll() for /proc/ccs/ interface.
+ * tomoyo_poll - poll() for /sys/kernel/security/tomoyo/ interface.
  *
  * @file: Pointer to "struct file".
  * @wait: Pointer to "poll_table".
@@ -63,7 +61,7 @@
 static ssize_t tomoyo_read(struct file *file, char __user *buf, size_t count,
 			   loff_t *ppos)
 {
-	return tomoyo_read_control(file, buf, count);
+	return tomoyo_read_control(file->private_data, buf, count);
 }
 
 /**
@@ -79,7 +77,7 @@
 static ssize_t tomoyo_write(struct file *file, const char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	return tomoyo_write_control(file, buf, count);
+	return tomoyo_write_control(file->private_data, buf, count);
 }
 
 /*
@@ -135,14 +133,14 @@
 			    TOMOYO_DOMAINPOLICY);
 	tomoyo_create_entry("exception_policy", 0600, tomoyo_dir,
 			    TOMOYO_EXCEPTIONPOLICY);
+	tomoyo_create_entry("audit",            0400, tomoyo_dir,
+			    TOMOYO_AUDIT);
 	tomoyo_create_entry("self_domain",      0400, tomoyo_dir,
 			    TOMOYO_SELFDOMAIN);
-	tomoyo_create_entry(".domain_status",   0600, tomoyo_dir,
-			    TOMOYO_DOMAIN_STATUS);
 	tomoyo_create_entry(".process_status",  0600, tomoyo_dir,
 			    TOMOYO_PROCESS_STATUS);
-	tomoyo_create_entry("meminfo",          0600, tomoyo_dir,
-			    TOMOYO_MEMINFO);
+	tomoyo_create_entry("stat",             0644, tomoyo_dir,
+			    TOMOYO_STAT);
 	tomoyo_create_entry("profile",          0600, tomoyo_dir,
 			    TOMOYO_PROFILE);
 	tomoyo_create_entry("manager",          0600, tomoyo_dir,
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 95d3f95..f776400 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -1,20 +1,35 @@
 /*
  * security/tomoyo/tomoyo.c
  *
- * LSM hooks for TOMOYO Linux.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/security.h>
 #include "common.h"
 
+/**
+ * tomoyo_cred_alloc_blank - Target for security_cred_alloc_blank().
+ *
+ * @new: Pointer to "struct cred".
+ * @gfp: Memory allocation flags.
+ *
+ * Returns 0.
+ */
 static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
 {
 	new->security = NULL;
 	return 0;
 }
 
+/**
+ * tomoyo_cred_prepare - Target for security_prepare_creds().
+ *
+ * @new: Pointer to "struct cred".
+ * @old: Pointer to "struct cred".
+ * @gfp: Memory allocation flags.
+ *
+ * Returns 0.
+ */
 static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
 			       gfp_t gfp)
 {
@@ -25,11 +40,22 @@
 	return 0;
 }
 
+/**
+ * tomoyo_cred_transfer - Target for security_transfer_creds().
+ *
+ * @new: Pointer to "struct cred".
+ * @old: Pointer to "struct cred".
+ */
 static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
 {
 	tomoyo_cred_prepare(new, old, 0);
 }
 
+/**
+ * tomoyo_cred_free - Target for security_cred_free().
+ *
+ * @cred: Pointer to "struct cred".
+ */
 static void tomoyo_cred_free(struct cred *cred)
 {
 	struct tomoyo_domain_info *domain = cred->security;
@@ -37,6 +63,13 @@
 		atomic_dec(&domain->users);
 }
 
+/**
+ * tomoyo_bprm_set_creds - Target for security_bprm_set_creds().
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
 {
 	int rc;
@@ -51,12 +84,14 @@
 	 */
 	if (bprm->cred_prepared)
 		return 0;
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
 	/*
 	 * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested
 	 * for the first time.
 	 */
 	if (!tomoyo_policy_loaded)
 		tomoyo_load_policy(bprm->filename);
+#endif
 	/*
 	 * Release reference to "struct tomoyo_domain_info" stored inside
 	 * "bprm->cred->security". New reference to "struct tomoyo_domain_info"
@@ -73,6 +108,13 @@
 	return 0;
 }
 
+/**
+ * tomoyo_bprm_check_security - Target for security_bprm_check().
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 {
 	struct tomoyo_domain_info *domain = bprm->cred->security;
@@ -90,20 +132,59 @@
 	/*
 	 * Read permission is checked against interpreters using next domain.
 	 */
-	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
+	return tomoyo_check_open_permission(domain, &bprm->file->f_path,
+					    O_RDONLY);
 }
 
+/**
+ * tomoyo_inode_getattr - Target for security_inode_getattr().
+ *
+ * @mnt:    Pointer to "struct vfsmount".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+	struct path path = { mnt, dentry };
+	return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL);
+}
+
+/**
+ * tomoyo_path_truncate - Target for security_path_truncate().
+ *
+ * @path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_truncate(struct path *path)
 {
-	return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path);
+	return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path, NULL);
 }
 
+/**
+ * tomoyo_path_unlink - Target for security_path_unlink().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_unlink(struct path *parent, struct dentry *dentry)
 {
 	struct path path = { parent->mnt, dentry };
-	return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path);
+	return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL);
 }
 
+/**
+ * tomoyo_path_mkdir - Target for security_path_mkdir().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @mode:   DAC permission mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry,
 			     int mode)
 {
@@ -112,19 +193,46 @@
 				       mode & S_IALLUGO);
 }
 
+/**
+ * tomoyo_path_rmdir - Target for security_path_rmdir().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_rmdir(struct path *parent, struct dentry *dentry)
 {
 	struct path path = { parent->mnt, dentry };
-	return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path);
+	return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL);
 }
 
+/**
+ * tomoyo_path_symlink - Target for security_path_symlink().
+ *
+ * @parent:   Pointer to "struct path".
+ * @dentry:   Pointer to "struct dentry".
+ * @old_name: Symlink's content.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry,
 			       const char *old_name)
 {
 	struct path path = { parent->mnt, dentry };
-	return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path);
+	return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path, old_name);
 }
 
+/**
+ * tomoyo_path_mknod - Target for security_path_mknod().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @mode:   DAC permission mode.
+ * @dev:    Device attributes.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry,
 			     int mode, unsigned int dev)
 {
@@ -155,6 +263,15 @@
 	return tomoyo_path_number_perm(type, &path, perm);
 }
 
+/**
+ * tomoyo_path_link - Target for security_path_link().
+ *
+ * @old_dentry: Pointer to "struct dentry".
+ * @new_dir:    Pointer to "struct path".
+ * @new_dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir,
 			    struct dentry *new_dentry)
 {
@@ -163,6 +280,16 @@
 	return tomoyo_path2_perm(TOMOYO_TYPE_LINK, &path1, &path2);
 }
 
+/**
+ * tomoyo_path_rename - Target for security_path_rename().
+ *
+ * @old_parent: Pointer to "struct path".
+ * @old_dentry: Pointer to "struct dentry".
+ * @new_parent: Pointer to "struct path".
+ * @new_dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_rename(struct path *old_parent,
 			      struct dentry *old_dentry,
 			      struct path *new_parent,
@@ -173,14 +300,32 @@
 	return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2);
 }
 
+/**
+ * tomoyo_file_fcntl - Target for security_file_fcntl().
+ *
+ * @file: Pointer to "struct file".
+ * @cmd:  Command for fcntl().
+ * @arg:  Argument for @cmd.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 			     unsigned long arg)
 {
-	if (cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND))
-		return tomoyo_path_perm(TOMOYO_TYPE_REWRITE, &file->f_path);
-	return 0;
+	if (!(cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND)))
+		return 0;
+	return tomoyo_check_open_permission(tomoyo_domain(), &file->f_path,
+					    O_WRONLY | (arg & O_APPEND));
 }
 
+/**
+ * tomoyo_dentry_open - Target for security_dentry_open().
+ *
+ * @f:    Pointer to "struct file".
+ * @cred: Pointer to "struct cred".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
 	int flags = f->f_flags;
@@ -190,12 +335,30 @@
 	return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags);
 }
 
+/**
+ * tomoyo_file_ioctl - Target for security_file_ioctl().
+ *
+ * @file: Pointer to "struct file".
+ * @cmd:  Command for ioctl().
+ * @arg:  Argument for @cmd.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_file_ioctl(struct file *file, unsigned int cmd,
 			     unsigned long arg)
 {
 	return tomoyo_path_number_perm(TOMOYO_TYPE_IOCTL, &file->f_path, cmd);
 }
 
+/**
+ * tomoyo_path_chmod - Target for security_path_chmod().
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @mnt:    Pointer to "struct vfsmount".
+ * @mode:   DAC permission mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
 			     mode_t mode)
 {
@@ -204,6 +367,15 @@
 				       mode & S_IALLUGO);
 }
 
+/**
+ * tomoyo_path_chown - Target for security_path_chown().
+ *
+ * @path: Pointer to "struct path".
+ * @uid:  Owner ID.
+ * @gid:  Group ID.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid)
 {
 	int error = 0;
@@ -214,23 +386,57 @@
 	return error;
 }
 
+/**
+ * tomoyo_path_chroot - Target for security_path_chroot().
+ *
+ * @path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_chroot(struct path *path)
 {
-	return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path);
+	return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path, NULL);
 }
 
+/**
+ * tomoyo_sb_mount - Target for security_sb_mount().
+ *
+ * @dev_name: Name of device file. Maybe NULL.
+ * @path:     Pointer to "struct path".
+ * @type:     Name of filesystem type. Maybe NULL.
+ * @flags:    Mount options.
+ * @data:     Optional data. Maybe NULL.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_sb_mount(char *dev_name, struct path *path,
 			   char *type, unsigned long flags, void *data)
 {
 	return tomoyo_mount_permission(dev_name, path, type, flags, data);
 }
 
+/**
+ * tomoyo_sb_umount - Target for security_sb_umount().
+ *
+ * @mnt:   Pointer to "struct vfsmount".
+ * @flags: Unmount options.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_sb_umount(struct vfsmount *mnt, int flags)
 {
 	struct path path = { mnt, mnt->mnt_root };
-	return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path);
+	return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path, NULL);
 }
 
+/**
+ * tomoyo_sb_pivotroot - Target for security_sb_pivotroot().
+ *
+ * @old_path: Pointer to "struct path".
+ * @new_path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_sb_pivotroot(struct path *old_path, struct path *new_path)
 {
 	return tomoyo_path2_perm(TOMOYO_TYPE_PIVOT_ROOT, new_path, old_path);
@@ -258,6 +464,7 @@
 	.path_mknod          = tomoyo_path_mknod,
 	.path_link           = tomoyo_path_link,
 	.path_rename         = tomoyo_path_rename,
+	.inode_getattr       = tomoyo_inode_getattr,
 	.file_ioctl          = tomoyo_file_ioctl,
 	.path_chmod          = tomoyo_path_chmod,
 	.path_chown          = tomoyo_path_chown,
@@ -270,6 +477,11 @@
 /* Lock for GC. */
 struct srcu_struct tomoyo_ss;
 
+/**
+ * tomoyo_init - Register TOMOYO Linux as a LSM module.
+ *
+ * Returns 0.
+ */
 static int __init tomoyo_init(void)
 {
 	struct cred *cred = (struct cred *) current_cred();
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 6d53932..c36bd11 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/util.c
  *
- * Utility functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/slab.h>
@@ -15,18 +13,130 @@
 /* Has /sbin/init started? */
 bool tomoyo_policy_loaded;
 
+/*
+ * Mapping table from "enum tomoyo_mac_index" to
+ * "enum tomoyo_mac_category_index".
+ */
+const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = {
+	/* CONFIG::file group */
+	[TOMOYO_MAC_FILE_EXECUTE]    = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_OPEN]       = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_CREATE]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_UNLINK]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_GETATTR]    = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_MKDIR]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_RMDIR]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_MKFIFO]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_MKSOCK]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_TRUNCATE]   = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_SYMLINK]    = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_MKBLOCK]    = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_MKCHAR]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_LINK]       = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_RENAME]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_CHMOD]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_CHOWN]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_CHGRP]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_IOCTL]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_CHROOT]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_MOUNT]      = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_UMOUNT]     = TOMOYO_MAC_CATEGORY_FILE,
+	[TOMOYO_MAC_FILE_PIVOT_ROOT] = TOMOYO_MAC_CATEGORY_FILE,
+};
+
+/**
+ * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss.
+ *
+ * @time:  Seconds since 1970/01/01 00:00:00.
+ * @stamp: Pointer to "struct tomoyo_time".
+ *
+ * Returns nothing.
+ *
+ * This function does not handle Y2038 problem.
+ */
+void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp)
+{
+	static const u16 tomoyo_eom[2][12] = {
+		{ 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+		{ 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+	};
+	u16 y;
+	u8 m;
+	bool r;
+	stamp->sec = time % 60;
+	time /= 60;
+	stamp->min = time % 60;
+	time /= 60;
+	stamp->hour = time % 24;
+	time /= 24;
+	for (y = 1970; ; y++) {
+		const unsigned short days = (y & 3) ? 365 : 366;
+		if (time < days)
+			break;
+		time -= days;
+	}
+	r = (y & 3) == 0;
+	for (m = 0; m < 11 && time >= tomoyo_eom[r][m]; m++)
+		;
+	if (m)
+		time -= tomoyo_eom[r][m - 1];
+	stamp->year = y;
+	stamp->month = ++m;
+	stamp->day = ++time;
+}
+
+/**
+ * tomoyo_permstr - Find permission keywords.
+ *
+ * @string: String representation for permissions in foo/bar/buz format.
+ * @keyword: Keyword to find from @string/
+ *
+ * Returns ture if @keyword was found in @string, false otherwise.
+ *
+ * This function assumes that strncmp(w1, w2, strlen(w1)) != 0 if w1 != w2.
+ */
+bool tomoyo_permstr(const char *string, const char *keyword)
+{
+	const char *cp = strstr(string, keyword);
+	if (cp)
+		return cp == string || *(cp - 1) == '/';
+	return false;
+}
+
+/**
+ * tomoyo_read_token - Read a word from a line.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns a word on success, "" otherwise.
+ *
+ * To allow the caller to skip NULL check, this function returns "" rather than
+ * NULL if there is no more words to read.
+ */
+char *tomoyo_read_token(struct tomoyo_acl_param *param)
+{
+	char *pos = param->data;
+	char *del = strchr(pos, ' ');
+	if (del)
+		*del++ = '\0';
+	else
+		del = pos + strlen(pos);
+	param->data = del;
+	return pos;
+}
+
 /**
  * tomoyo_parse_ulong - Parse an "unsigned long" value.
  *
  * @result: Pointer to "unsigned long".
  * @str:    Pointer to string to parse.
  *
- * Returns value type on success, 0 otherwise.
+ * Returns one of values in "enum tomoyo_value_type".
  *
  * The @src is updated to point the first character after the value
  * on success.
  */
-static u8 tomoyo_parse_ulong(unsigned long *result, char **str)
+u8 tomoyo_parse_ulong(unsigned long *result, char **str)
 {
 	const char *cp = *str;
 	char *ep;
@@ -43,7 +153,7 @@
 	}
 	*result = simple_strtoul(cp, &ep, base);
 	if (cp == ep)
-		return 0;
+		return TOMOYO_VALUE_TYPE_INVALID;
 	*str = ep;
 	switch (base) {
 	case 16:
@@ -81,63 +191,65 @@
 /**
  * tomoyo_parse_name_union - Parse a tomoyo_name_union.
  *
- * @filename: Name or name group.
- * @ptr:      Pointer to "struct tomoyo_name_union".
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr:   Pointer to "struct tomoyo_name_union".
  *
  * Returns true on success, false otherwise.
  */
-bool tomoyo_parse_name_union(const char *filename,
+bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
 			     struct tomoyo_name_union *ptr)
 {
-	if (!tomoyo_correct_word(filename))
-		return false;
-	if (filename[0] == '@') {
-		ptr->group = tomoyo_get_group(filename + 1, TOMOYO_PATH_GROUP);
-		ptr->is_group = true;
+	char *filename;
+	if (param->data[0] == '@') {
+		param->data++;
+		ptr->group = tomoyo_get_group(param, TOMOYO_PATH_GROUP);
 		return ptr->group != NULL;
 	}
+	filename = tomoyo_read_token(param);
+	if (!tomoyo_correct_word(filename))
+		return false;
 	ptr->filename = tomoyo_get_name(filename);
-	ptr->is_group = false;
 	return ptr->filename != NULL;
 }
 
 /**
  * tomoyo_parse_number_union - Parse a tomoyo_number_union.
  *
- * @data: Number or number range or number group.
- * @ptr:  Pointer to "struct tomoyo_number_union".
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr:   Pointer to "struct tomoyo_number_union".
  *
  * Returns true on success, false otherwise.
  */
-bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num)
+bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
+			       struct tomoyo_number_union *ptr)
 {
+	char *data;
 	u8 type;
 	unsigned long v;
-	memset(num, 0, sizeof(*num));
-	if (data[0] == '@') {
-		if (!tomoyo_correct_word(data))
-			return false;
-		num->group = tomoyo_get_group(data + 1, TOMOYO_NUMBER_GROUP);
-		num->is_group = true;
-		return num->group != NULL;
+	memset(ptr, 0, sizeof(*ptr));
+	if (param->data[0] == '@') {
+		param->data++;
+		ptr->group = tomoyo_get_group(param, TOMOYO_NUMBER_GROUP);
+		return ptr->group != NULL;
 	}
+	data = tomoyo_read_token(param);
 	type = tomoyo_parse_ulong(&v, &data);
-	if (!type)
+	if (type == TOMOYO_VALUE_TYPE_INVALID)
 		return false;
-	num->values[0] = v;
-	num->min_type = type;
+	ptr->values[0] = v;
+	ptr->value_type[0] = type;
 	if (!*data) {
-		num->values[1] = v;
-		num->max_type = type;
+		ptr->values[1] = v;
+		ptr->value_type[1] = type;
 		return true;
 	}
 	if (*data++ != '-')
 		return false;
 	type = tomoyo_parse_ulong(&v, &data);
-	if (!type || *data)
+	if (type == TOMOYO_VALUE_TYPE_INVALID || *data || ptr->values[0] > v)
 		return false;
-	num->values[1] = v;
-	num->max_type = type;
+	ptr->values[1] = v;
+	ptr->value_type[1] = type;
 	return true;
 }
 
@@ -185,6 +297,30 @@
 }
 
 /**
+ * tomoyo_valid - Check whether the character is a valid char.
+ *
+ * @c: The character to check.
+ *
+ * Returns true if @c is a valid character, false otherwise.
+ */
+static inline bool tomoyo_valid(const unsigned char c)
+{
+	return c > ' ' && c < 127;
+}
+
+/**
+ * tomoyo_invalid - Check whether the character is an invalid char.
+ *
+ * @c: The character to check.
+ *
+ * Returns true if @c is an invalid character, false otherwise.
+ */
+static inline bool tomoyo_invalid(const unsigned char c)
+{
+	return c && (c <= ' ' || c >= 127);
+}
+
+/**
  * tomoyo_str_starts - Check whether the given string starts with the given keyword.
  *
  * @src:  Pointer to pointer to the string.
@@ -238,36 +374,9 @@
 }
 
 /**
- * tomoyo_tokenize - Tokenize string.
- *
- * @buffer: The line to tokenize.
- * @w:      Pointer to "char *".
- * @size:   Sizeof @w .
- *
- * Returns true on success, false otherwise.
- */
-bool tomoyo_tokenize(char *buffer, char *w[], size_t size)
-{
-	int count = size / sizeof(char *);
-	int i;
-	for (i = 0; i < count; i++)
-		w[i] = "";
-	for (i = 0; i < count; i++) {
-		char *cp = strchr(buffer, ' ');
-		if (cp)
-			*cp = '\0';
-		w[i] = buffer;
-		if (!cp)
-			break;
-		buffer = cp + 1;
-	}
-	return i < count || !*buffer;
-}
-
-/**
  * tomoyo_correct_word2 - Validate a string.
  *
- * @string: The string to check. May be non-'\0'-terminated.
+ * @string: The string to check. Maybe non-'\0'-terminated.
  * @len:    Length of @string.
  *
  * Check whether the given string follows the naming rules.
@@ -377,26 +486,21 @@
  */
 bool tomoyo_correct_domain(const unsigned char *domainname)
 {
-	if (!domainname || strncmp(domainname, TOMOYO_ROOT_NAME,
-				   TOMOYO_ROOT_NAME_LEN))
-		goto out;
-	domainname += TOMOYO_ROOT_NAME_LEN;
-	if (!*domainname)
+	if (!domainname || !tomoyo_domain_def(domainname))
+		return false;
+	domainname = strchr(domainname, ' ');
+	if (!domainname++)
 		return true;
-	if (*domainname++ != ' ')
-		goto out;
 	while (1) {
 		const unsigned char *cp = strchr(domainname, ' ');
 		if (!cp)
 			break;
 		if (*domainname != '/' ||
 		    !tomoyo_correct_word2(domainname, cp - domainname))
-			goto out;
+			return false;
 		domainname = cp + 1;
 	}
 	return tomoyo_correct_path(domainname);
- out:
-	return false;
 }
 
 /**
@@ -408,7 +512,19 @@
  */
 bool tomoyo_domain_def(const unsigned char *buffer)
 {
-	return !strncmp(buffer, TOMOYO_ROOT_NAME, TOMOYO_ROOT_NAME_LEN);
+	const unsigned char *cp;
+	int len;
+	if (*buffer != '<')
+		return false;
+	cp = strchr(buffer, ' ');
+	if (!cp)
+		len = strlen(buffer);
+	else
+		len = cp - buffer;
+	if (buffer[len - 1] != '>' ||
+	    !tomoyo_correct_word2(buffer + 1, len - 2))
+		return false;
+	return true;
 }
 
 /**
@@ -794,22 +910,24 @@
 /**
  * tomoyo_get_mode - Get MAC mode.
  *
+ * @ns:      Pointer to "struct tomoyo_policy_namespace".
  * @profile: Profile number.
  * @index:   Index number of functionality.
  *
  * Returns mode.
  */
-int tomoyo_get_mode(const u8 profile, const u8 index)
+int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
+		    const u8 index)
 {
 	u8 mode;
 	const u8 category = TOMOYO_MAC_CATEGORY_FILE;
 	if (!tomoyo_policy_loaded)
 		return TOMOYO_CONFIG_DISABLED;
-	mode = tomoyo_profile(profile)->config[index];
+	mode = tomoyo_profile(ns, profile)->config[index];
 	if (mode == TOMOYO_CONFIG_USE_DEFAULT)
-		mode = tomoyo_profile(profile)->config[category];
+		mode = tomoyo_profile(ns, profile)->config[category];
 	if (mode == TOMOYO_CONFIG_USE_DEFAULT)
-		mode = tomoyo_profile(profile)->default_config;
+		mode = tomoyo_profile(ns, profile)->default_config;
 	return mode & 3;
 }
 
@@ -833,65 +951,11 @@
 	profile = domain->profile;
 	r->profile = profile;
 	r->type = index;
-	r->mode = tomoyo_get_mode(profile, index);
+	r->mode = tomoyo_get_mode(domain->ns, profile, index);
 	return r->mode;
 }
 
 /**
- * tomoyo_last_word - Get last component of a line.
- *
- * @line: A line.
- *
- * Returns the last word of a line.
- */
-const char *tomoyo_last_word(const char *name)
-{
-	const char *cp = strrchr(name, ' ');
-	if (cp)
-		return cp + 1;
-	return name;
-}
-
-/**
- * tomoyo_warn_log - Print warning or error message on console.
- *
- * @r:   Pointer to "struct tomoyo_request_info".
- * @fmt: The printf()'s format string, followed by parameters.
- */
-void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...)
-{
-	va_list args;
-	char *buffer;
-	const struct tomoyo_domain_info * const domain = r->domain;
-	const struct tomoyo_profile *profile = tomoyo_profile(domain->profile);
-	switch (r->mode) {
-	case TOMOYO_CONFIG_ENFORCING:
-		if (!profile->enforcing->enforcing_verbose)
-			return;
-		break;
-	case TOMOYO_CONFIG_PERMISSIVE:
-		if (!profile->permissive->permissive_verbose)
-			return;
-		break;
-	case TOMOYO_CONFIG_LEARNING:
-		if (!profile->learning->learning_verbose)
-			return;
-		break;
-	}
-	buffer = kmalloc(4096, GFP_NOFS);
-	if (!buffer)
-		return;
-	va_start(args, fmt);
-	vsnprintf(buffer, 4095, fmt, args);
-	va_end(args);
-	buffer[4095] = '\0';
-	printk(KERN_WARNING "%s: Access %s denied for %s\n",
-	       r->mode == TOMOYO_CONFIG_ENFORCING ? "ERROR" : "WARNING", buffer,
-	       tomoyo_last_word(domain->domainname->name));
-	kfree(buffer);
-}
-
-/**
  * tomoyo_domain_quota_is_ok - Check for domain's quota.
  *
  * @r: Pointer to "struct tomoyo_request_info".
@@ -911,52 +975,43 @@
 	if (!domain)
 		return true;
 	list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+		u16 perm;
+		u8 i;
 		if (ptr->is_deleted)
 			continue;
 		switch (ptr->type) {
-			u16 perm;
-			u8 i;
 		case TOMOYO_TYPE_PATH_ACL:
 			perm = container_of(ptr, struct tomoyo_path_acl, head)
 				->perm;
-			for (i = 0; i < TOMOYO_MAX_PATH_OPERATION; i++)
-				if (perm & (1 << i))
-					count++;
-			if (perm & (1 << TOMOYO_TYPE_READ_WRITE))
-				count -= 2;
 			break;
 		case TOMOYO_TYPE_PATH2_ACL:
 			perm = container_of(ptr, struct tomoyo_path2_acl, head)
 				->perm;
-			for (i = 0; i < TOMOYO_MAX_PATH2_OPERATION; i++)
-				if (perm & (1 << i))
-					count++;
 			break;
 		case TOMOYO_TYPE_PATH_NUMBER_ACL:
 			perm = container_of(ptr, struct tomoyo_path_number_acl,
 					    head)->perm;
-			for (i = 0; i < TOMOYO_MAX_PATH_NUMBER_OPERATION; i++)
-				if (perm & (1 << i))
-					count++;
 			break;
 		case TOMOYO_TYPE_MKDEV_ACL:
 			perm = container_of(ptr, struct tomoyo_mkdev_acl,
 					    head)->perm;
-			for (i = 0; i < TOMOYO_MAX_MKDEV_OPERATION; i++)
-				if (perm & (1 << i))
-					count++;
 			break;
 		default:
-			count++;
+			perm = 1;
 		}
+		for (i = 0; i < 16; i++)
+			if (perm & (1 << i))
+				count++;
 	}
-	if (count < tomoyo_profile(domain->profile)->learning->
-	    learning_max_entry)
+	if (count < tomoyo_profile(domain->ns, domain->profile)->
+	    pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
 		return true;
-	if (!domain->quota_warned) {
-		domain->quota_warned = true;
-		printk(KERN_WARNING "TOMOYO-WARNING: "
-		       "Domain '%s' has so many ACLs to hold. "
+	if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
+		domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
+		/* r->granted = false; */
+		tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+		printk(KERN_WARNING "WARNING: "
+		       "Domain '%s' has too many ACLs to hold. "
 		       "Stopped learning mode.\n", domain->domainname->name);
 	}
 	return false;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index f134130..86d0caf 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -128,7 +128,8 @@
 	}
 }
 
-static void pcm_debug_name(struct snd_pcm_substream *substream,
+#ifdef CONFIG_SND_DEBUG
+void snd_pcm_debug_name(struct snd_pcm_substream *substream,
 			   char *name, size_t len)
 {
 	snprintf(name, len, "pcmC%dD%d%c:%d",
@@ -137,6 +138,8 @@
 		 substream->stream ? 'c' : 'p',
 		 substream->number);
 }
+EXPORT_SYMBOL(snd_pcm_debug_name);
+#endif
 
 #define XRUN_DEBUG_BASIC	(1<<0)
 #define XRUN_DEBUG_STACK	(1<<1)	/* dump also stack */
@@ -168,7 +171,7 @@
 	snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
 	if (xrun_debug(substream, XRUN_DEBUG_BASIC)) {
 		char name[16];
-		pcm_debug_name(substream, name, sizeof(name));
+		snd_pcm_debug_name(substream, name, sizeof(name));
 		snd_printd(KERN_DEBUG "XRUN: %s\n", name);
 		dump_stack_on_xrun(substream);
 	}
@@ -243,7 +246,7 @@
 		return;
 	if (xrun_debug(substream, XRUN_DEBUG_LOGONCE) && log->hit)
 		return;
-	pcm_debug_name(substream, name, sizeof(name));
+	snd_pcm_debug_name(substream, name, sizeof(name));
 	for (cnt = 0, idx = log->idx; cnt < XRUN_LOG_CNT; cnt++) {
 		entry = &log->entries[idx];
 		if (entry->period_size == 0)
@@ -319,7 +322,7 @@
 	if (pos >= runtime->buffer_size) {
 		if (printk_ratelimit()) {
 			char name[16];
-			pcm_debug_name(substream, name, sizeof(name));
+			snd_pcm_debug_name(substream, name, sizeof(name));
 			xrun_log_show(substream);
 			snd_printd(KERN_ERR  "BUG: %s, pos = %ld, "
 				   "buffer size = %ld, period size = %ld\n",
@@ -364,7 +367,7 @@
 	if (xrun_debug(substream, in_interrupt ?
 			XRUN_DEBUG_PERIODUPDATE : XRUN_DEBUG_HWPTRUPDATE)) {
 		char name[16];
-		pcm_debug_name(substream, name, sizeof(name));
+		snd_pcm_debug_name(substream, name, sizeof(name));
 		snd_printd("%s_update: %s: pos=%u/%u/%u, "
 			   "hwptr=%ld/%ld/%ld/%ld\n",
 			   in_interrupt ? "period" : "hwptr",
diff --git a/sound/isa/msnd/msnd.h b/sound/isa/msnd/msnd.h
index 3773e24..a168ba3 100644
--- a/sound/isa/msnd/msnd.h
+++ b/sound/isa/msnd/msnd.h
@@ -249,7 +249,7 @@
 
 	/* State variables */
 	enum { msndClassic, msndPinnacle } type;
-	mode_t mode;
+	fmode_t mode;
 	unsigned long flags;
 #define F_RESETTING			0
 #define F_HAVEDIGITAL			1
diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c
index 4d2a6ae9..8a197fd 100644
--- a/sound/oss/ad1848.c
+++ b/sound/oss/ad1848.c
@@ -458,7 +458,7 @@
 	return mask;
 }
 
-static void change_bits(ad1848_info * devc, unsigned char *regval,
+static void oss_change_bits(ad1848_info *devc, unsigned char *regval,
 			unsigned char *muteval, int dev, int chn, int newval)
 {
 	unsigned char mask;
@@ -516,10 +516,10 @@
 
 	if (muteregoffs != regoffs) {
 		muteval = ad_read(devc, muteregoffs);
-		change_bits(devc, &val, &muteval, dev, channel, value);
+		oss_change_bits(devc, &val, &muteval, dev, channel, value);
 	}
 	else
-		change_bits(devc, &val, &val, dev, channel, value);
+		oss_change_bits(devc, &val, &val, dev, channel, value);
 
 	spin_lock_irqsave(&devc->lock,flags);
 	ad_write(devc, regoffs, val);
diff --git a/sound/oss/sb_mixer.c b/sound/oss/sb_mixer.c
index 2039d31..f8f3b7a 100644
--- a/sound/oss/sb_mixer.c
+++ b/sound/oss/sb_mixer.c
@@ -232,7 +232,7 @@
 	return 1;
 }
 
-static void change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval)
+static void oss_change_bits(sb_devc *devc, unsigned char *regval, int dev, int chn, int newval)
 {
 	unsigned char mask;
 	int shift;
@@ -284,7 +284,7 @@
 		return -EINVAL;
 
 	val = sb_getmixer(devc, regoffs);
-	change_bits(devc, &val, dev, LEFT_CHN, left);
+	oss_change_bits(devc, &val, dev, LEFT_CHN, left);
 
 	if ((*devc->iomap)[dev][RIGHT_CHN].regno != regoffs)	/*
 								 * Change register
@@ -304,7 +304,7 @@
 							 * Read the new one
 							 */
 	}
-	change_bits(devc, &val, dev, RIGHT_CHN, right);
+	oss_change_bits(devc, &val, dev, RIGHT_CHN, right);
 
 	sb_setmixer(devc, regoffs, val);
 
diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index b941d25..eae62eb 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c
@@ -41,31 +41,10 @@
 #include <sound/tlv.h>
 #include <sound/hwdep.h>
 
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>");
 MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx");
 
-#if defined CONFIG_SND_DEBUG
-/* copied from pcm_lib.c, hope later patch will make that version public
-and this copy can be removed */
-static inline void
-snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
-{
-	snprintf(buf, size, "pcmC%dD%d%c:%d",
-		 substream->pcm->card->number,
-		 substream->pcm->device,
-		 substream->stream ? 'c' : 'p',
-		 substream->number);
-}
-#else
-static inline void
-snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
-{
-	*buf = 0;
-}
-#endif
-
 #if defined CONFIG_SND_DEBUG_VERBOSE
 /**
  * snd_printddd - very verbose debug printk
diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index 65fcf47..9683f84 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -107,7 +107,6 @@
 	union hpi_response_buffer_v1 *hr;
 	u16 res_max_size;
 	u32 uncopied_bytes;
-	struct hpi_adapter *pa = NULL;
 	int err = 0;
 
 	if (cmd != HPI_IOCTL_LINUX)
@@ -182,8 +181,9 @@
 		/* -1=no data 0=read from user mem, 1=write to user mem */
 		int wrflag = -1;
 		u32 adapter = hm->h.adapter_index;
+		struct hpi_adapter *pa = &adapters[adapter];
 
-		if ((adapter > HPI_MAX_ADAPTERS) || (!pa->type)) {
+		if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) {
 			hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER,
 				HPI_ADAPTER_OPEN,
 				HPI_ERROR_BAD_ADAPTER_NUMBER);
@@ -197,9 +197,7 @@
 			goto out;
 		}
 
-		pa = &adapters[adapter];
-
-		if (mutex_lock_interruptible(&adapters[adapter].mutex)) {
+		if (mutex_lock_interruptible(&pa->mutex)) {
 			err = -EINTR;
 			goto out;
 		}
@@ -235,8 +233,7 @@
 							"stream buffer size %d\n",
 							size);
 
-						mutex_unlock(&adapters
-							[adapter].mutex);
+						mutex_unlock(&pa->mutex);
 						err = -EINVAL;
 						goto out;
 					}
@@ -277,7 +274,7 @@
 					uncopied_bytes, size);
 		}
 
-		mutex_unlock(&adapters[adapter].mutex);
+		mutex_unlock(&pa->mutex);
 	}
 
 	/* on return response size must be set */
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 7489b46..bb7e102d6 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -243,6 +243,7 @@
 
 config SND_HDA_POWER_SAVE
 	bool "Aggressive power-saving on HD-audio"
+	depends on PM
 	help
 	  Say Y here to enable more aggressive power-saving mode on
 	  HD-audio driver.  The power-saving timeout can be configured
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 9c27a3a..3e7850c 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -91,8 +91,10 @@
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 static void hda_power_work(struct work_struct *work);
 static void hda_keep_power_on(struct hda_codec *codec);
+#define hda_codec_is_power_on(codec)	((codec)->power_on)
 #else
 static inline void hda_keep_power_on(struct hda_codec *codec) {}
+#define hda_codec_is_power_on(codec)	1
 #endif
 
 /**
@@ -1101,7 +1103,7 @@
 }
 EXPORT_SYMBOL_HDA(snd_hda_shutup_pins);
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /* Restore the pin controls cleared previously via snd_hda_shutup_pins() */
 static void restore_shutup_pins(struct hda_codec *codec)
 {
@@ -1499,7 +1501,7 @@
 	}
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /* clean up all streams; called from suspend */
 static void hda_cleanup_all_streams(struct hda_codec *codec)
 {
@@ -1838,7 +1840,7 @@
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_amp_stereo);
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /**
  * snd_hda_codec_resume_amp - Resume all AMP commands from the cache
  * @codec: HD-audio codec
@@ -1868,7 +1870,7 @@
 	}
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_resume_amp);
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
 static u32 get_amp_max_value(struct hda_codec *codec, hda_nid_t nid, int dir,
 			     unsigned int ofs)
@@ -3082,7 +3084,7 @@
 }
 EXPORT_SYMBOL_HDA(snd_hda_create_spdif_in_ctls);
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /*
  * command cache
  */
@@ -3199,53 +3201,32 @@
 					  seq->param);
 }
 EXPORT_SYMBOL_HDA(snd_hda_sequence_write_cache);
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
-/*
- * set power state of the codec
- */
-static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
-				unsigned int power_state)
+void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg,
+				    unsigned int power_state,
+				    bool eapd_workaround)
 {
-	hda_nid_t nid;
+	hda_nid_t nid = codec->start_nid;
 	int i;
 
-	/* this delay seems necessary to avoid click noise at power-down */
-	if (power_state == AC_PWRST_D3)
-		msleep(100);
-	snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
-			    power_state);
-	/* partial workaround for "azx_get_response timeout" */
-	if (power_state == AC_PWRST_D0 &&
-	    (codec->vendor_id & 0xffff0000) == 0x14f10000)
-		msleep(10);
-
-	nid = codec->start_nid;
 	for (i = 0; i < codec->num_nodes; i++, nid++) {
 		unsigned int wcaps = get_wcaps(codec, nid);
-		if (wcaps & AC_WCAP_POWER) {
-			unsigned int wid_type = get_wcaps_type(wcaps);
-			if (power_state == AC_PWRST_D3 &&
-			    wid_type == AC_WID_PIN) {
-				unsigned int pincap;
-				/*
-				 * don't power down the widget if it controls
-				 * eapd and EAPD_BTLENABLE is set.
-				 */
-				pincap = snd_hda_query_pin_caps(codec, nid);
-				if (pincap & AC_PINCAP_EAPD) {
-					int eapd = snd_hda_codec_read(codec,
-						nid, 0,
+		if (!(wcaps & AC_WCAP_POWER))
+			continue;
+		/* don't power down the widget if it controls eapd and
+		 * EAPD_BTLENABLE is set.
+		 */
+		if (eapd_workaround && power_state == AC_PWRST_D3 &&
+		    get_wcaps_type(wcaps) == AC_WID_PIN &&
+		    (snd_hda_query_pin_caps(codec, nid) & AC_PINCAP_EAPD)) {
+			int eapd = snd_hda_codec_read(codec, nid, 0,
 						AC_VERB_GET_EAPD_BTLENABLE, 0);
-					eapd &= 0x02;
-					if (eapd)
-						continue;
-				}
-			}
-			snd_hda_codec_write(codec, nid, 0,
-					    AC_VERB_SET_POWER_STATE,
-					    power_state);
+			if (eapd & 0x02)
+				continue;
 		}
+		snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE,
+				    power_state);
 	}
 
 	if (power_state == AC_PWRST_D0) {
@@ -3262,6 +3243,26 @@
 		} while (time_after_eq(end_time, jiffies));
 	}
 }
+EXPORT_SYMBOL_HDA(snd_hda_codec_set_power_to_all);
+
+/*
+ * set power state of the codec
+ */
+static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
+				unsigned int power_state)
+{
+	if (codec->patch_ops.set_power_state) {
+		codec->patch_ops.set_power_state(codec, fg, power_state);
+		return;
+	}
+
+	/* this delay seems necessary to avoid click noise at power-down */
+	if (power_state == AC_PWRST_D3)
+		msleep(100);
+	snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+			    power_state);
+	snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
 
 #ifdef CONFIG_SND_HDA_HWDEP
 /* execute additional init verbs */
@@ -3274,7 +3275,7 @@
 static inline void hda_exec_init_verbs(struct hda_codec *codec) {}
 #endif
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /*
  * call suspend and power-down; used both from PM and power-save
  */
@@ -3315,7 +3316,7 @@
 		snd_hda_codec_resume_cache(codec);
 	}
 }
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
 
 /**
@@ -4071,9 +4072,6 @@
 EXPORT_SYMBOL_HDA(snd_hda_add_new_ctls);
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
-static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
-				unsigned int power_state);
-
 static void hda_power_work(struct work_struct *work)
 {
 	struct hda_codec *codec =
@@ -4376,11 +4374,8 @@
 	if (!bus)
 		return;
 	list_for_each_entry(codec, &bus->codec_list, list) {
-#ifdef CONFIG_SND_HDA_POWER_SAVE
-		if (!codec->power_on)
-			continue;
-#endif
-		if (codec->patch_ops.reboot_notify)
+		if (hda_codec_is_power_on(codec) &&
+		    codec->patch_ops.reboot_notify)
 			codec->patch_ops.reboot_notify(codec);
 	}
 }
@@ -5079,11 +5074,10 @@
 	struct hda_codec *codec;
 
 	list_for_each_entry(codec, &bus->codec_list, list) {
-#ifdef CONFIG_SND_HDA_POWER_SAVE
-		if (!codec->power_on)
-			continue;
-#endif
-		hda_call_codec_suspend(codec);
+		if (hda_codec_is_power_on(codec))
+			hda_call_codec_suspend(codec);
+		if (codec->patch_ops.post_suspend)
+			codec->patch_ops.post_suspend(codec);
 	}
 	return 0;
 }
@@ -5103,6 +5097,8 @@
 	struct hda_codec *codec;
 
 	list_for_each_entry(codec, &bus->codec_list, list) {
+		if (codec->patch_ops.pre_resume)
+			codec->patch_ops.pre_resume(codec);
 		if (snd_hda_codec_needs_resume(codec))
 			hda_call_codec_resume(codec);
 	}
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index f465e07..755f2b0 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -26,10 +26,6 @@
 #include <sound/pcm.h>
 #include <sound/hwdep.h>
 
-#if defined(CONFIG_PM) || defined(CONFIG_SND_HDA_POWER_SAVE)
-#define SND_HDA_NEEDS_RESUME	/* resume control code is required */
-#endif
-
 /*
  * nodes
  */
@@ -704,8 +700,12 @@
 	int (*init)(struct hda_codec *codec);
 	void (*free)(struct hda_codec *codec);
 	void (*unsol_event)(struct hda_codec *codec, unsigned int res);
-#ifdef SND_HDA_NEEDS_RESUME
+	void (*set_power_state)(struct hda_codec *codec, hda_nid_t fg,
+				unsigned int power_state);
+#ifdef CONFIG_PM
 	int (*suspend)(struct hda_codec *codec, pm_message_t state);
+	int (*post_suspend)(struct hda_codec *codec);
+	int (*pre_resume)(struct hda_codec *codec);
 	int (*resume)(struct hda_codec *codec);
 #endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -927,7 +927,7 @@
 int snd_hda_queue_unsol_event(struct hda_bus *bus, u32 res, u32 res_ex);
 
 /* cached write */
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid,
 			      int direct, unsigned int verb, unsigned int parm);
 void snd_hda_sequence_write_cache(struct hda_codec *codec,
@@ -1008,6 +1008,9 @@
  */
 void snd_hda_get_codec_name(struct hda_codec *codec, char *name, int namelen);
 void snd_hda_bus_reboot_notify(struct hda_bus *bus);
+void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg,
+				    unsigned int power_state,
+				    bool eapd_workaround);
 
 /*
  * power management
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 88b277e..2e7ac31 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -131,7 +131,7 @@
 			     int direction, int idx, int mask, int val);
 int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid,
 			     int dir, int idx, int mask, int val);
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 void snd_hda_codec_resume_amp(struct hda_codec *codec);
 #endif
 
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 1362c8b..8648917 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -563,7 +563,7 @@
 	snd_hda_detach_beep_device(codec);
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int ad198x_suspend(struct hda_codec *codec, pm_message_t state)
 {
 	ad198x_shutup(codec);
@@ -579,7 +579,7 @@
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	.check_power_status = ad198x_check_power_status,
 #endif
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 	.suspend = ad198x_suspend,
 #endif
 	.reboot_notify = ad198x_shutup,
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 7f93739..47d6ffc 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -25,6 +25,7 @@
 #include <sound/core.h>
 #include "hda_codec.h"
 #include "hda_local.h"
+#include <sound/tlv.h>
 
 /*
  */
@@ -61,9 +62,15 @@
 
 	unsigned int hp_detect:1;
 	unsigned int mic_detect:1;
+	/* CS421x */
+	unsigned int spdif_detect:1;
+	unsigned int sense_b:1;
+	hda_nid_t vendor_nid;
+	struct hda_input_mux input_mux;
+	unsigned int last_input;
 };
 
-/* available models */
+/* available models with CS420x */
 enum {
 	CS420X_MBP53,
 	CS420X_MBP55,
@@ -72,6 +79,12 @@
 	CS420X_MODELS
 };
 
+/* CS421x boards */
+enum {
+	CS421X_CDB4210,
+	CS421X_MODELS
+};
+
 /* Vendor-specific processing widget */
 #define CS420X_VENDOR_NID	0x11
 #define CS_DIG_OUT1_PIN_NID	0x10
@@ -111,21 +124,42 @@
 /* 0x0009 - 0x0014 -> 12 test regs */
 /* 0x0015 - visibility reg */
 
+/*
+ * Cirrus Logic CS4210
+ *
+ * 1 DAC => HP(sense) / Speakers,
+ * 1 ADC <= LineIn(sense) / MicIn / DMicIn,
+ * 1 SPDIF OUT => SPDIF Trasmitter(sense)
+*/
+#define CS4210_DAC_NID		0x02
+#define CS4210_ADC_NID		0x03
+#define CS421X_VENDOR_NID	0x0B
+#define CS421X_DMIC_PIN_NID	0x09 /* Port E */
+#define CS421X_SPDIF_PIN_NID	0x0A /* Port H */
+
+#define CS421X_IDX_DEV_CFG	0x01
+#define CS421X_IDX_ADC_CFG	0x02
+#define CS421X_IDX_DAC_CFG	0x03
+#define CS421X_IDX_SPK_CTL	0x04
+
+#define SPDIF_EVENT		0x04
 
 static inline int cs_vendor_coef_get(struct hda_codec *codec, unsigned int idx)
 {
-	snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+	struct cs_spec *spec = codec->spec;
+	snd_hda_codec_write(codec, spec->vendor_nid, 0,
 			    AC_VERB_SET_COEF_INDEX, idx);
-	return snd_hda_codec_read(codec, CS420X_VENDOR_NID, 0,
+	return snd_hda_codec_read(codec, spec->vendor_nid, 0,
 				  AC_VERB_GET_PROC_COEF, 0);
 }
 
 static inline void cs_vendor_coef_set(struct hda_codec *codec, unsigned int idx,
 				      unsigned int coef)
 {
-	snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+	struct cs_spec *spec = codec->spec;
+	snd_hda_codec_write(codec, spec->vendor_nid, 0,
 			    AC_VERB_SET_COEF_INDEX, idx);
-	snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+	snd_hda_codec_write(codec, spec->vendor_nid, 0,
 			    AC_VERB_SET_PROC_COEF, coef);
 }
 
@@ -347,15 +381,12 @@
 	nid = codec->start_nid;
 	for (i = 0; i < codec->num_nodes; i++, nid++) {
 		unsigned int type;
-		int idx;
 		type = get_wcaps_type(get_wcaps(codec, nid));
 		if (type != AC_WID_AUD_IN)
 			continue;
-		idx = snd_hda_get_conn_index(codec, nid, pin, 0);
-		if (idx >= 0) {
-			*idxp = idx;
+		*idxp = snd_hda_get_conn_index(codec, nid, pin, false);
+		if (*idxp >= 0)
 			return nid;
-		}
 	}
 	return 0;
 }
@@ -835,6 +866,8 @@
 
 /*
  * auto-mute and auto-mic switching
+ * CS421x auto-output redirecting
+ * HP/SPK/SPDIF
  */
 
 static void cs_automute(struct hda_codec *codec)
@@ -842,9 +875,25 @@
 	struct cs_spec *spec = codec->spec;
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	unsigned int hp_present;
+	unsigned int spdif_present;
 	hda_nid_t nid;
 	int i;
 
+	spdif_present = 0;
+	if (cfg->dig_outs) {
+		nid = cfg->dig_out_pins[0];
+		if (is_jack_detectable(codec, nid)) {
+			/*
+			TODO: SPDIF output redirect when SENSE_B is enabled.
+			Shared (SENSE_A) jack (e.g HP/mini-TOSLINK)
+			assumed.
+			*/
+			if (snd_hda_jack_detect(codec, nid)
+				/* && spec->sense_b */)
+				spdif_present = 1;
+		}
+	}
+
 	hp_present = 0;
 	for (i = 0; i < cfg->hp_outs; i++) {
 		nid = cfg->hp_pins[i];
@@ -854,11 +903,19 @@
 		if (hp_present)
 			break;
 	}
+
+	/* mute speakers if spdif or hp jack is plugged in */
 	for (i = 0; i < cfg->speaker_outs; i++) {
 		nid = cfg->speaker_pins[i];
 		snd_hda_codec_write(codec, nid, 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL,
 				    hp_present ? 0 : PIN_OUT);
+		/* detect on spdif is specific to CS421x */
+		if (spec->vendor_nid == CS421X_VENDOR_NID) {
+			snd_hda_codec_write(codec, nid, 0,
+					AC_VERB_SET_PIN_WIDGET_CONTROL,
+					spdif_present ? 0 : PIN_OUT);
+		}
 	}
 	if (spec->board_config == CS420X_MBP53 ||
 	    spec->board_config == CS420X_MBP55 ||
@@ -867,21 +924,62 @@
 		snd_hda_codec_write(codec, 0x01, 0,
 				    AC_VERB_SET_GPIO_DATA, gpio);
 	}
+
+	/* specific to CS421x */
+	if (spec->vendor_nid == CS421X_VENDOR_NID) {
+		/* mute HPs if spdif jack (SENSE_B) is present */
+		for (i = 0; i < cfg->hp_outs; i++) {
+			nid = cfg->hp_pins[i];
+			snd_hda_codec_write(codec, nid, 0,
+				AC_VERB_SET_PIN_WIDGET_CONTROL,
+				(spdif_present && spec->sense_b) ? 0 : PIN_HP);
+		}
+
+		/* SPDIF TX on/off */
+		if (cfg->dig_outs) {
+			nid = cfg->dig_out_pins[0];
+			snd_hda_codec_write(codec, nid, 0,
+				AC_VERB_SET_PIN_WIDGET_CONTROL,
+				spdif_present ? PIN_OUT : 0);
+
+		}
+		/* Update board GPIOs if neccessary ... */
+	}
 }
 
+/*
+ * Auto-input redirect for CS421x
+ * Switch max 3 inputs of a single ADC (nid 3)
+*/
+
 static void cs_automic(struct hda_codec *codec)
 {
 	struct cs_spec *spec = codec->spec;
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	hda_nid_t nid;
 	unsigned int present;
-	
+
 	nid = cfg->inputs[spec->automic_idx].pin;
 	present = snd_hda_jack_detect(codec, nid);
-	if (present)
-		change_cur_input(codec, spec->automic_idx, 0);
-	else
-		change_cur_input(codec, !spec->automic_idx, 0);
+
+	/* specific to CS421x, single ADC */
+	if (spec->vendor_nid == CS421X_VENDOR_NID) {
+		if (present) {
+			spec->last_input = spec->cur_input;
+			spec->cur_input = spec->automic_idx;
+		} else  {
+			spec->cur_input = spec->last_input;
+		}
+
+		snd_hda_codec_write_cache(codec, spec->cur_adc, 0,
+					AC_VERB_SET_CONNECT_SEL,
+					spec->adc_idx[spec->cur_input]);
+	} else {
+		if (present)
+			change_cur_input(codec, spec->automic_idx, 0);
+		else
+			change_cur_input(codec, !spec->automic_idx, 0);
+	}
 }
 
 /*
@@ -911,23 +1009,28 @@
 	for (i = 0; i < cfg->line_outs; i++)
 		snd_hda_codec_write(codec, cfg->line_out_pins[i], 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
+	/* HP */
 	for (i = 0; i < cfg->hp_outs; i++) {
 		hda_nid_t nid = cfg->hp_pins[i];
 		snd_hda_codec_write(codec, nid, 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP);
 		if (!cfg->speaker_outs)
 			continue;
-		if (is_jack_detectable(codec, nid)) {
+		if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) {
 			snd_hda_codec_write(codec, nid, 0,
 					    AC_VERB_SET_UNSOLICITED_ENABLE,
 					    AC_USRSP_EN | HP_EVENT);
 			spec->hp_detect = 1;
 		}
 	}
+
+	/* Speaker */
 	for (i = 0; i < cfg->speaker_outs; i++)
 		snd_hda_codec_write(codec, cfg->speaker_pins[i], 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
-	if (spec->hp_detect)
+
+	/* SPDIF is enabled on presence detect for CS421x */
+	if (spec->hp_detect || spec->spdif_detect)
 		cs_automute(codec);
 }
 
@@ -961,19 +1064,31 @@
 					    AC_VERB_SET_UNSOLICITED_ENABLE,
 					    AC_USRSP_EN | MIC_EVENT);
 	}
-	change_cur_input(codec, spec->cur_input, 1);
-	if (spec->mic_detect)
-		cs_automic(codec);
+	/* specific to CS421x */
+	if (spec->vendor_nid == CS421X_VENDOR_NID) {
+		if (spec->mic_detect)
+			cs_automic(codec);
+		else  {
+			spec->cur_adc = spec->adc_nid[spec->cur_input];
+			snd_hda_codec_write(codec, spec->cur_adc, 0,
+					AC_VERB_SET_CONNECT_SEL,
+					spec->adc_idx[spec->cur_input]);
+		}
+	} else {
+		change_cur_input(codec, spec->cur_input, 1);
+		if (spec->mic_detect)
+			cs_automic(codec);
 
-	coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
-	if (is_active_pin(codec, CS_DMIC2_PIN_NID))
-		coef |= 0x0500; /* DMIC2 enable 2 channels, disable GPIO1 */
-	if (is_active_pin(codec, CS_DMIC1_PIN_NID))
-		coef |= 0x1800; /* DMIC1 enable 2 channels, disable GPIO0 
-				 * No effect if SPDIF_OUT2 is selected in 
-				 * IDX_SPDIF_CTL.
-				  */
-	cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+		coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
+		if (is_active_pin(codec, CS_DMIC2_PIN_NID))
+			coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
+		if (is_active_pin(codec, CS_DMIC1_PIN_NID))
+			coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
+					 * No effect if SPDIF_OUT2 is
+					 * selected in IDX_SPDIF_CTL.
+					*/
+		cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+	}
 }
 
 static const struct hda_verb cs_coef_init_verbs[] = {
@@ -1221,16 +1336,16 @@
 	[CS420X_IMAC27] = imac27_pincfgs,
 };
 
-static void fix_pincfg(struct hda_codec *codec, int model)
+static void fix_pincfg(struct hda_codec *codec, int model,
+		       const struct cs_pincfg **pin_configs)
 {
-	const struct cs_pincfg *cfg = cs_pincfgs[model];
+	const struct cs_pincfg *cfg = pin_configs[model];
 	if (!cfg)
 		return;
 	for (; cfg->nid; cfg++)
 		snd_hda_codec_set_pincfg(codec, cfg->nid, cfg->val);
 }
 
-
 static int patch_cs420x(struct hda_codec *codec)
 {
 	struct cs_spec *spec;
@@ -1241,11 +1356,13 @@
 		return -ENOMEM;
 	codec->spec = spec;
 
+	spec->vendor_nid = CS420X_VENDOR_NID;
+
 	spec->board_config =
 		snd_hda_check_board_config(codec, CS420X_MODELS,
 					   cs420x_models, cs420x_cfg_tbl);
 	if (spec->board_config >= 0)
-		fix_pincfg(codec, spec->board_config);
+		fix_pincfg(codec, spec->board_config, cs_pincfgs);
 
 	switch (spec->board_config) {
 	case CS420X_IMAC27:
@@ -1272,6 +1389,562 @@
 	return err;
 }
 
+/*
+ * Cirrus Logic CS4210
+ *
+ * 1 DAC => HP(sense) / Speakers,
+ * 1 ADC <= LineIn(sense) / MicIn / DMicIn,
+ * 1 SPDIF OUT => SPDIF Trasmitter(sense)
+*/
+
+/* CS4210 board names */
+static const char *cs421x_models[CS421X_MODELS] = {
+	[CS421X_CDB4210] = "cdb4210",
+};
+
+static const struct snd_pci_quirk cs421x_cfg_tbl[] = {
+	/* Test Intel board + CDB2410  */
+	SND_PCI_QUIRK(0x8086, 0x5001, "DP45SG/CDB4210", CS421X_CDB4210),
+	{} /* terminator */
+};
+
+/* CS4210 board pinconfigs */
+/* Default CS4210 (CDB4210)*/
+static const struct cs_pincfg cdb4210_pincfgs[] = {
+	{ 0x05, 0x0321401f },
+	{ 0x06, 0x90170010 },
+	{ 0x07, 0x03813031 },
+	{ 0x08, 0xb7a70037 },
+	{ 0x09, 0xb7a6003e },
+	{ 0x0a, 0x034510f0 },
+	{} /* terminator */
+};
+
+static const struct cs_pincfg *cs421x_pincfgs[CS421X_MODELS] = {
+	[CS421X_CDB4210] = cdb4210_pincfgs,
+};
+
+static const struct hda_verb cs421x_coef_init_verbs[] = {
+	{0x0B, AC_VERB_SET_PROC_STATE, 1},
+	{0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DEV_CFG},
+	/*
+	    Disable Coefficient Index Auto-Increment(DAI)=1,
+	    PDREF=0
+	*/
+	{0x0B, AC_VERB_SET_PROC_COEF, 0x0001 },
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_ADC_CFG},
+	/* ADC SZCMode = Digital Soft Ramp */
+	{0x0B, AC_VERB_SET_PROC_COEF, 0x0002 },
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DAC_CFG},
+	{0x0B, AC_VERB_SET_PROC_COEF,
+	 (0x0002 /* DAC SZCMode = Digital Soft Ramp */
+	  | 0x0004 /* Mute DAC on FIFO error */
+	  | 0x0008 /* Enable DAC High Pass Filter */
+	  )},
+	{} /* terminator */
+};
+
+/* Errata: CS4210 rev A1 Silicon
+ *
+ * http://www.cirrus.com/en/pubs/errata/
+ *
+ * Description:
+ * 1. Performance degredation is present in the ADC.
+ * 2. Speaker output is not completely muted upon HP detect.
+ * 3. Noise is present when clipping occurs on the amplified
+ *    speaker outputs.
+ *
+ * Workaround:
+ * The following verb sequence written to the registers during
+ * initialization will correct the issues listed above.
+ */
+
+static const struct hda_verb cs421x_coef_init_verbs_A1_silicon_fixes[] = {
+	{0x0B, AC_VERB_SET_PROC_STATE, 0x01},  /* VPW: processing on */
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, 0x0006},
+	{0x0B, AC_VERB_SET_PROC_COEF, 0x9999}, /* Test mode: on */
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, 0x000A},
+	{0x0B, AC_VERB_SET_PROC_COEF, 0x14CB}, /* Chop double */
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, 0x0011},
+	{0x0B, AC_VERB_SET_PROC_COEF, 0xA2D0}, /* Increase ADC current */
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, 0x001A},
+	{0x0B, AC_VERB_SET_PROC_COEF, 0x02A9}, /* Mute speaker */
+
+	{0x0B, AC_VERB_SET_COEF_INDEX, 0x001B},
+	{0x0B, AC_VERB_SET_PROC_COEF, 0X1006}, /* Remove noise */
+
+	{} /* terminator */
+};
+
+/* Speaker Amp Gain is controlled by the vendor widget's coef 4 */
+static const DECLARE_TLV_DB_SCALE(cs421x_speaker_boost_db_scale, 900, 300, 0);
+
+static int cs421x_boost_vol_info(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 3;
+	return 0;
+}
+
+static int cs421x_boost_vol_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+
+	ucontrol->value.integer.value[0] =
+		cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL) & 0x0003;
+	return 0;
+}
+
+static int cs421x_boost_vol_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+
+	unsigned int vol = ucontrol->value.integer.value[0];
+	unsigned int coef =
+		cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL);
+	unsigned int original_coef = coef;
+
+	coef &= ~0x0003;
+	coef |= (vol & 0x0003);
+	if (original_coef == coef)
+		return 0;
+	else {
+		cs_vendor_coef_set(codec, CS421X_IDX_SPK_CTL, coef);
+		return 1;
+	}
+}
+
+static const struct snd_kcontrol_new cs421x_speaker_bost_ctl = {
+
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ),
+	.name = "Speaker Boost Playback Volume",
+	.info = cs421x_boost_vol_info,
+	.get = cs421x_boost_vol_get,
+	.put = cs421x_boost_vol_put,
+	.tlv = { .p = cs421x_speaker_boost_db_scale },
+};
+
+static void cs421x_pinmux_init(struct hda_codec *codec)
+{
+	struct cs_spec *spec = codec->spec;
+	unsigned int def_conf, coef;
+
+	/* GPIO, DMIC_SCL, DMIC_SDA and SENSE_B are multiplexed */
+	coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG);
+
+	if (spec->gpio_mask)
+		coef |= 0x0008; /* B1,B2 are GPIOs */
+	else
+		coef &= ~0x0008;
+
+	if (spec->sense_b)
+		coef |= 0x0010; /* B2 is SENSE_B, not inverted  */
+	else
+		coef &= ~0x0010;
+
+	cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef);
+
+	if ((spec->gpio_mask || spec->sense_b) &&
+	    is_active_pin(codec, CS421X_DMIC_PIN_NID)) {
+
+		/*
+		    GPIO or SENSE_B forced - disconnect the DMIC pin.
+		*/
+		def_conf = snd_hda_codec_get_pincfg(codec, CS421X_DMIC_PIN_NID);
+		def_conf &= ~AC_DEFCFG_PORT_CONN;
+		def_conf |= (AC_JACK_PORT_NONE << AC_DEFCFG_PORT_CONN_SHIFT);
+		snd_hda_codec_set_pincfg(codec, CS421X_DMIC_PIN_NID, def_conf);
+	}
+}
+
+static void init_cs421x_digital(struct hda_codec *codec)
+{
+	struct cs_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i;
+
+
+	for (i = 0; i < cfg->dig_outs; i++) {
+		hda_nid_t nid = cfg->dig_out_pins[i];
+		if (!cfg->speaker_outs)
+			continue;
+		if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) {
+
+			snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_UNSOLICITED_ENABLE,
+				    AC_USRSP_EN | SPDIF_EVENT);
+			spec->spdif_detect = 1;
+		}
+	}
+}
+
+static int cs421x_init(struct hda_codec *codec)
+{
+	struct cs_spec *spec = codec->spec;
+
+	snd_hda_sequence_write(codec, cs421x_coef_init_verbs);
+	snd_hda_sequence_write(codec, cs421x_coef_init_verbs_A1_silicon_fixes);
+
+	cs421x_pinmux_init(codec);
+
+	if (spec->gpio_mask) {
+		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK,
+				    spec->gpio_mask);
+		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION,
+				    spec->gpio_dir);
+		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
+				    spec->gpio_data);
+	}
+
+	init_output(codec);
+	init_input(codec);
+	init_cs421x_digital(codec);
+
+	return 0;
+}
+
+/*
+ * CS4210 Input MUX (1 ADC)
+ */
+static int cs421x_mux_enum_info(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_info *uinfo)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct cs_spec *spec = codec->spec;
+
+	return snd_hda_input_mux_info(&spec->input_mux, uinfo);
+}
+
+static int cs421x_mux_enum_get(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct cs_spec *spec = codec->spec;
+
+	ucontrol->value.enumerated.item[0] = spec->cur_input;
+	return 0;
+}
+
+static int cs421x_mux_enum_put(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct cs_spec *spec = codec->spec;
+
+	return snd_hda_input_mux_put(codec, &spec->input_mux, ucontrol,
+				spec->adc_nid[0], &spec->cur_input);
+
+}
+
+static struct snd_kcontrol_new cs421x_capture_source = {
+
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "Capture Source",
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.info = cs421x_mux_enum_info,
+	.get = cs421x_mux_enum_get,
+	.put = cs421x_mux_enum_put,
+};
+
+static int cs421x_add_input_volume_control(struct hda_codec *codec, int item)
+{
+	struct cs_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	const struct hda_input_mux *imux = &spec->input_mux;
+	hda_nid_t pin = cfg->inputs[item].pin;
+	struct snd_kcontrol *kctl;
+	u32 caps;
+
+	if (!(get_wcaps(codec, pin) & AC_WCAP_IN_AMP))
+		return 0;
+
+	caps = query_amp_caps(codec, pin, HDA_INPUT);
+	caps = (caps & AC_AMPCAP_NUM_STEPS) >> AC_AMPCAP_NUM_STEPS_SHIFT;
+	if (caps <= 1)
+		return 0;
+
+	return add_volume(codec,  imux->items[item].label, 0,
+			  HDA_COMPOSE_AMP_VAL(pin, 3, 0, HDA_INPUT), 1, &kctl);
+}
+
+/* add a (input-boost) volume control to the given input pin */
+static int build_cs421x_input(struct hda_codec *codec)
+{
+	struct cs_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	struct hda_input_mux *imux = &spec->input_mux;
+	int i, err, type_idx;
+	const char *label;
+
+	if (!spec->num_inputs)
+		return 0;
+
+	/* make bind-capture */
+	spec->capture_bind[0] = make_bind_capture(codec, &snd_hda_bind_sw);
+	spec->capture_bind[1] = make_bind_capture(codec, &snd_hda_bind_vol);
+	for (i = 0; i < 2; i++) {
+		struct snd_kcontrol *kctl;
+		int n;
+		if (!spec->capture_bind[i])
+			return -ENOMEM;
+		kctl = snd_ctl_new1(&cs_capture_ctls[i], codec);
+		if (!kctl)
+			return -ENOMEM;
+		kctl->private_value = (long)spec->capture_bind[i];
+		err = snd_hda_ctl_add(codec, 0, kctl);
+		if (err < 0)
+			return err;
+		for (n = 0; n < AUTO_PIN_LAST; n++) {
+			if (!spec->adc_nid[n])
+				continue;
+			err = snd_hda_add_nid(codec, kctl, 0, spec->adc_nid[n]);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	/* Add Input MUX Items + Capture Volume/Switch */
+	for (i = 0; i < spec->num_inputs; i++) {
+		label = hda_get_autocfg_input_label(codec, cfg, i);
+		snd_hda_add_imux_item(imux, label, spec->adc_idx[i], &type_idx);
+
+		err = cs421x_add_input_volume_control(codec, i);
+		if (err < 0)
+			return err;
+	}
+
+	/*
+	    Add 'Capture Source' Switch if
+		* 2 inputs and no mic detec
+		* 3 inputs
+	*/
+	if ((spec->num_inputs == 2 && !spec->mic_detect) ||
+	    (spec->num_inputs == 3)) {
+
+		err = snd_hda_ctl_add(codec, spec->adc_nid[0],
+			      snd_ctl_new1(&cs421x_capture_source, codec));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+/* Single DAC (Mute/Gain) */
+static int build_cs421x_output(struct hda_codec *codec)
+{
+	hda_nid_t dac = CS4210_DAC_NID;
+	struct cs_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	struct snd_kcontrol *kctl;
+	int err;
+	char *name = "HP/Speakers";
+
+	fix_volume_caps(codec, dac);
+	if (!spec->vmaster_sw) {
+		err = add_vmaster(codec, dac);
+		if (err < 0)
+			return err;
+	}
+
+	err = add_mute(codec, name, 0,
+			HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl);
+	if (err < 0)
+		return err;
+	err = snd_ctl_add_slave(spec->vmaster_sw, kctl);
+	if (err < 0)
+		return err;
+
+	err = add_volume(codec, name, 0,
+			HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl);
+	if (err < 0)
+		return err;
+	err = snd_ctl_add_slave(spec->vmaster_vol, kctl);
+	if (err < 0)
+		return err;
+
+	if (cfg->speaker_outs) {
+		err = snd_hda_ctl_add(codec, 0,
+			snd_ctl_new1(&cs421x_speaker_bost_ctl, codec));
+		if (err < 0)
+			return err;
+	}
+	return err;
+}
+
+static int cs421x_build_controls(struct hda_codec *codec)
+{
+	int err;
+
+	err = build_cs421x_output(codec);
+	if (err < 0)
+		return err;
+	err = build_cs421x_input(codec);
+	if (err < 0)
+		return err;
+	err = build_digital_output(codec);
+	if (err < 0)
+		return err;
+	return cs421x_init(codec);
+}
+
+static void cs421x_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+	switch ((res >> 26) & 0x3f) {
+	case HP_EVENT:
+	case SPDIF_EVENT:
+		cs_automute(codec);
+		break;
+
+	case MIC_EVENT:
+		cs_automic(codec);
+		break;
+	}
+}
+
+static int parse_cs421x_input(struct hda_codec *codec)
+{
+	struct cs_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i;
+
+	for (i = 0; i < cfg->num_inputs; i++) {
+		hda_nid_t pin = cfg->inputs[i].pin;
+		spec->adc_nid[i] = get_adc(codec, pin, &spec->adc_idx[i]);
+		spec->cur_input = spec->last_input = i;
+		spec->num_inputs++;
+
+		/* check whether the automatic mic switch is available */
+		if (is_ext_mic(codec, i) && cfg->num_inputs >= 2) {
+			spec->mic_detect = 1;
+			spec->automic_idx = i;
+		}
+	}
+	return 0;
+}
+
+static int cs421x_parse_auto_config(struct hda_codec *codec)
+{
+	struct cs_spec *spec = codec->spec;
+	int err;
+
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
+	if (err < 0)
+		return err;
+	err = parse_output(codec);
+	if (err < 0)
+		return err;
+	err = parse_cs421x_input(codec);
+	if (err < 0)
+		return err;
+	err = parse_digital_output(codec);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+	Manage PDREF, when transitioning to D3hot
+	(DAC,ADC) -> D3, PDREF=1, AFG->D3
+*/
+static int cs421x_suspend(struct hda_codec *codec, pm_message_t state)
+{
+	unsigned int coef;
+
+	snd_hda_shutup_pins(codec);
+
+	snd_hda_codec_write(codec, CS4210_DAC_NID, 0,
+			    AC_VERB_SET_POWER_STATE,  AC_PWRST_D3);
+	snd_hda_codec_write(codec, CS4210_ADC_NID, 0,
+			    AC_VERB_SET_POWER_STATE,  AC_PWRST_D3);
+
+	coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG);
+	coef |= 0x0004; /* PDREF */
+	cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef);
+
+	return 0;
+}
+#endif
+
+static struct hda_codec_ops cs4210_patch_ops = {
+	.build_controls = cs421x_build_controls,
+	.build_pcms = cs_build_pcms,
+	.init = cs421x_init,
+	.free = cs_free,
+	.unsol_event = cs421x_unsol_event,
+#ifdef CONFIG_PM
+	.suspend = cs421x_suspend,
+#endif
+};
+
+static int patch_cs421x(struct hda_codec *codec)
+{
+	struct cs_spec *spec;
+	int err;
+
+	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+	codec->spec = spec;
+
+	spec->vendor_nid = CS421X_VENDOR_NID;
+
+	spec->board_config =
+		snd_hda_check_board_config(codec, CS421X_MODELS,
+					   cs421x_models, cs421x_cfg_tbl);
+	if (spec->board_config >= 0)
+		fix_pincfg(codec, spec->board_config, cs421x_pincfgs);
+	/*
+	    Setup GPIO/SENSE for each board (if used)
+	*/
+	switch (spec->board_config) {
+	case CS421X_CDB4210:
+		snd_printd("CS4210 board: %s\n",
+			cs421x_models[spec->board_config]);
+/*		spec->gpio_mask = 3;
+		spec->gpio_dir = 3;
+		spec->gpio_data = 3;
+*/
+		spec->sense_b = 1;
+
+		break;
+	}
+
+	/*
+	    Update the GPIO/DMIC/SENSE_B pinmux before the configuration
+	    is auto-parsed. If GPIO or SENSE_B is forced, DMIC input
+	    is disabled.
+	*/
+	cs421x_pinmux_init(codec);
+
+	err = cs421x_parse_auto_config(codec);
+	if (err < 0)
+		goto error;
+
+	codec->patch_ops = cs4210_patch_ops;
+
+	return 0;
+
+ error:
+	kfree(codec->spec);
+	codec->spec = NULL;
+	return err;
+}
+
 
 /*
  * patch entries
@@ -1279,11 +1952,13 @@
 static const struct hda_codec_preset snd_hda_preset_cirrus[] = {
 	{ .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x },
 	{ .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x },
+	{ .id = 0x10134210, .name = "CS4210", .patch = patch_cs421x },
 	{} /* terminator */
 };
 
 MODULE_ALIAS("snd-hda-codec-id:10134206");
 MODULE_ALIAS("snd-hda-codec-id:10134207");
+MODULE_ALIAS("snd-hda-codec-id:10134210");
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cirrus Logic HD-audio codec");
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 884f67b..502fc94 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -446,6 +446,19 @@
 	return 0;
 }
 
+static void conexant_set_power(struct hda_codec *codec, hda_nid_t fg,
+			       unsigned int power_state)
+{
+	if (power_state == AC_PWRST_D3)
+		msleep(100);
+	snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+			    power_state);
+	/* partial workaround for "azx_get_response timeout" */
+	if (power_state == AC_PWRST_D0)
+		msleep(10);
+	snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
+
 static int conexant_init(struct hda_codec *codec)
 {
 	struct conexant_spec *spec = codec->spec;
@@ -588,6 +601,7 @@
 	.build_pcms = conexant_build_pcms,
 	.init = conexant_init,
 	.free = conexant_free,
+	.set_power_state = conexant_set_power,
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	.suspend = conexant_suspend,
 #endif
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 52ce075..e125c60 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -895,13 +895,15 @@
 	if (present == 3)
 		spec->automute_hp_lo = 1; /* both HP and LO automute */
 
-	if (!cfg->speaker_pins[0]) {
+	if (!cfg->speaker_pins[0] &&
+	    cfg->line_out_type == AUTO_PIN_SPEAKER_OUT) {
 		memcpy(cfg->speaker_pins, cfg->line_out_pins,
 		       sizeof(cfg->speaker_pins));
 		cfg->speaker_outs = cfg->line_outs;
 	}
 
-	if (!cfg->hp_pins[0]) {
+	if (!cfg->hp_pins[0] &&
+	    cfg->line_out_type == AUTO_PIN_HP_OUT) {
 		memcpy(cfg->hp_pins, cfg->line_out_pins,
 		       sizeof(cfg->hp_pins));
 		cfg->hp_outs = cfg->line_outs;
@@ -920,6 +922,7 @@
 		spec->automute_mode = ALC_AUTOMUTE_PIN;
 	}
 	if (spec->automute && cfg->line_out_pins[0] &&
+	    cfg->speaker_pins[0] &&
 	    cfg->line_out_pins[0] != cfg->hp_pins[0] &&
 	    cfg->line_out_pins[0] != cfg->speaker_pins[0]) {
 		for (i = 0; i < cfg->line_outs; i++) {
@@ -1911,7 +1914,7 @@
 				return err;
 		}
 	}
-	if (spec->cap_mixer) {
+	if (spec->cap_mixer && spec->adc_nids) {
 		const char *kname = kctl ? kctl->id.name : NULL;
 		for (knew = spec->cap_mixer; knew->name; knew++) {
 			if (kname && strcmp(knew->name, kname) == 0)
@@ -2386,7 +2389,7 @@
 }
 #endif
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int alc_resume(struct hda_codec *codec)
 {
 	msleep(150); /* to avoid pop noise */
@@ -2406,7 +2409,7 @@
 	.init = alc_init,
 	.free = alc_free,
 	.unsol_event = alc_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 	.resume = alc_resume,
 #endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -2801,7 +2804,8 @@
 	int i;
 
  again:
-	spec->multiout.num_dacs = 0;
+	/* set num_dacs once to full for alc_auto_look_for_dac() */
+	spec->multiout.num_dacs = cfg->line_outs;
 	spec->multiout.hp_nid = 0;
 	spec->multiout.extra_out_nid[0] = 0;
 	memset(spec->private_dac_nids, 0, sizeof(spec->private_dac_nids));
@@ -2834,6 +2838,8 @@
 		}
 	}
 
+	/* re-count num_dacs and squash invalid entries */
+	spec->multiout.num_dacs = 0;
 	for (i = 0; i < cfg->line_outs; i++) {
 		if (spec->private_dac_nids[i])
 			spec->multiout.num_dacs++;
@@ -3674,7 +3680,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc880_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -3801,7 +3807,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc260_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -3980,7 +3986,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc882_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -4134,7 +4140,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc262_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -4290,7 +4296,7 @@
 					  (0 << AC_AMPCAP_MUTE_SHIFT));
 	}
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -4410,7 +4416,7 @@
 	}
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int alc269_resume(struct hda_codec *codec)
 {
 	if ((alc_read_coef_idx(codec, 0) & 0x00ff) == 0x018) {
@@ -4433,7 +4439,7 @@
 	hda_call_check_power_status(codec, 0x01);
 	return 0;
 }
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
 static void alc269_fixup_hweq(struct hda_codec *codec,
 			       const struct alc_fixup *fix, int action)
@@ -4702,7 +4708,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc269_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -4725,7 +4731,7 @@
 	spec->vmaster_nid = 0x02;
 
 	codec->patch_ops = alc_patch_ops;
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 	codec->patch_ops.resume = alc269_resume;
 #endif
 	if (board_config == ALC_MODEL_AUTO)
@@ -4840,7 +4846,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc861_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -4981,7 +4987,7 @@
 		add_verb(spec, alc660vd_eapd_verbs);
 	}
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -5197,7 +5203,7 @@
 	if (board_config != ALC_MODEL_AUTO)
 		setup_preset(codec, &alc662_presets[board_config]);
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
@@ -5333,7 +5339,7 @@
 #endif
 	}
 
-	if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+	if (!spec->no_analog && !spec->adc_nids) {
 		alc_auto_fill_adc_caps(codec);
 		alc_rebuild_imux_for_auto_mic(codec);
 		alc_remove_invalid_adc_nids(codec);
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 56425a5..aa376b5 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -95,6 +95,7 @@
 	STAC_92HD83XXX_PWR_REF,
 	STAC_DELL_S14,
 	STAC_92HD83XXX_HP,
+	STAC_92HD83XXX_HP_cNB11_INTQUAD,
 	STAC_HP_DV7_4000,
 	STAC_92HD83XXX_MODELS
 };
@@ -212,6 +213,7 @@
 	unsigned int gpio_mute;
 	unsigned int gpio_led;
 	unsigned int gpio_led_polarity;
+	unsigned int vref_led;
 
 	/* stream */
 	unsigned int stream_delay;
@@ -671,6 +673,30 @@
 	return 0;
 }
 
+static int stac_vrefout_set(struct hda_codec *codec,
+					hda_nid_t nid, unsigned int new_vref)
+{
+	int error, pinctl;
+
+	snd_printdd("%s, nid %x ctl %x\n", __func__, nid, new_vref);
+	pinctl = snd_hda_codec_read(codec, nid, 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+
+	if (pinctl < 0)
+		return pinctl;
+
+	pinctl &= 0xff;
+	pinctl &= ~AC_PINCTL_VREFEN;
+	pinctl |= (new_vref & AC_PINCTL_VREFEN);
+
+	error = snd_hda_codec_write_cache(codec, nid, 0,
+					AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl);
+	if (error < 0)
+		return error;
+
+	return 1;
+}
+
 static unsigned int stac92xx_vref_set(struct hda_codec *codec,
 					hda_nid_t nid, unsigned int new_vref)
 {
@@ -1636,10 +1662,17 @@
 	0x40f000f0, 0x40f000f0,
 };
 
+static const unsigned int hp_cNB11_intquad_pin_configs[10] = {
+	0x40f000f0, 0x0221101f, 0x02a11020, 0x92170110,
+	0x40f000f0, 0x92170110, 0x40f000f0, 0xd5a30130,
+	0x40f000f0, 0x40f000f0,
+};
+
 static const unsigned int *stac92hd83xxx_brd_tbl[STAC_92HD83XXX_MODELS] = {
 	[STAC_92HD83XXX_REF] = ref92hd83xxx_pin_configs,
 	[STAC_92HD83XXX_PWR_REF] = ref92hd83xxx_pin_configs,
 	[STAC_DELL_S14] = dell_s14_pin_configs,
+	[STAC_92HD83XXX_HP_cNB11_INTQUAD] = hp_cNB11_intquad_pin_configs,
 	[STAC_HP_DV7_4000] = hp_dv7_4000_pin_configs,
 };
 
@@ -1649,6 +1682,7 @@
 	[STAC_92HD83XXX_PWR_REF] = "mic-ref",
 	[STAC_DELL_S14] = "dell-s14",
 	[STAC_92HD83XXX_HP] = "hp",
+	[STAC_92HD83XXX_HP_cNB11_INTQUAD] = "hp_cNB11_intquad",
 	[STAC_HP_DV7_4000] = "hp-dv7-4000",
 };
 
@@ -1661,7 +1695,47 @@
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02ba,
 		      "unknown Dell", STAC_DELL_S14),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x3600,
-		      "HP", STAC_92HD83XXX_HP),
+			  "HP", STAC_92HD83XXX_HP),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1656,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1657,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1658,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1659,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165A,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165B,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3388,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3389,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355B,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355C,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355D,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355E,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355F,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3560,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358B,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358C,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358D,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3591,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3592,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3593,
+			  "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
 	{} /* terminator */
 };
 
@@ -4020,6 +4094,8 @@
 {
 	unsigned int gpiostate, gpiomask, gpiodir;
 
+	snd_printdd("%s msk %x dir %x gpio %x\n", __func__, mask, dir_mask, data);
+
 	gpiostate = snd_hda_codec_read(codec, codec->afg, 0,
 				       AC_VERB_GET_GPIO_DATA, 0);
 	gpiostate = (gpiostate & ~dir_mask) | (data & dir_mask);
@@ -4209,10 +4285,12 @@
 		spec->eapd_switch = val;
 	get_int_hint(codec, "gpio_led_polarity", &spec->gpio_led_polarity);
 	if (get_int_hint(codec, "gpio_led", &spec->gpio_led)) {
-		spec->gpio_mask |= spec->gpio_led;
-		spec->gpio_dir |= spec->gpio_led;
-		if (spec->gpio_led_polarity)
-			spec->gpio_data |= spec->gpio_led;
+		if (spec->gpio_led <= 8) {
+			spec->gpio_mask |= spec->gpio_led;
+			spec->gpio_dir |= spec->gpio_led;
+			if (spec->gpio_led_polarity)
+				spec->gpio_data |= spec->gpio_led;
+		}
 	}
 }
 
@@ -4382,11 +4460,26 @@
 	snd_array_free(&spec->kctls);
 }
 
+static void stac92xx_shutup_pins(struct hda_codec *codec)
+{
+	unsigned int i, def_conf;
+
+	if (codec->bus->shutdown)
+		return;
+	for (i = 0; i < codec->init_pins.used; i++) {
+		struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+		def_conf = snd_hda_codec_get_pincfg(codec, pin->nid);
+		if (get_defcfg_connect(def_conf) != AC_JACK_PORT_NONE)
+			snd_hda_codec_write(codec, pin->nid, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL, 0);
+	}
+}
+
 static void stac92xx_shutup(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
 
-	snd_hda_shutup_pins(codec);
+	stac92xx_shutup_pins(codec);
 
 	if (spec->eapd_mask)
 		stac_gpio_set(codec, spec->gpio_mask,
@@ -4784,10 +4877,11 @@
 	if ((codec->subsystem_id >> 16) == PCI_VENDOR_ID_HP) {
 		while ((dev = dmi_find_device(DMI_DEV_TYPE_OEM_STRING,
 								NULL, dev))) {
-			if (sscanf(dev->name, "HP_Mute_LED_%d_%d",
+			if (sscanf(dev->name, "HP_Mute_LED_%d_%x",
 				  &spec->gpio_led_polarity,
 				  &spec->gpio_led) == 2) {
-				spec->gpio_led = 1 << spec->gpio_led;
+				if (spec->gpio_led < 4)
+					spec->gpio_led = 1 << spec->gpio_led;
 				return 1;
 			}
 			if (sscanf(dev->name, "HP_Mute_LED_%d",
@@ -4885,7 +4979,7 @@
 #define stac927x_proc_hook	NULL
 #endif
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int stac92xx_resume(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -4901,29 +4995,81 @@
 			stac_issue_unsol_event(codec,
 					       spec->autocfg.line_out_pins[0]);
 	}
-	/* sync mute LED */
-	if (spec->gpio_led)
-		hda_call_check_power_status(codec, 0x01);
 	return 0;
 }
 
-/*
- * using power check for controlling mute led of HP notebooks
- * check for mute state only on Speakers (nid = 0x10)
- *
- * For this feature CONFIG_SND_HDA_POWER_SAVE is needed, otherwise
- * the LED is NOT working properly !
- *
- * Changed name to reflect that it now works for any designated
- * model, not just HP HDX.
- */
+static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state)
+{
+	stac92xx_shutup(codec);
+	return 0;
+}
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
-static int stac92xx_hp_check_power_status(struct hda_codec *codec,
-					      hda_nid_t nid)
+static int stac92xx_pre_resume(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	int i, muted = 1;
+
+	/* sync mute LED */
+	if (spec->gpio_led) {
+		if (spec->gpio_led <= 8) {
+			stac_gpio_set(codec, spec->gpio_mask,
+					spec->gpio_dir, spec->gpio_data);
+		} else {
+			stac_vrefout_set(codec,
+					spec->gpio_led, spec->vref_led);
+		}
+	}
+	return 0;
+}
+
+static int stac92xx_post_suspend(struct hda_codec *codec)
+{
+	struct sigmatel_spec *spec = codec->spec;
+	if (spec->gpio_led > 8) {
+		/* with vref-out pin used for mute led control
+		 * codec AFG is prevented from D3 state, but on
+		 * system suspend it can (and should) be used
+		 */
+		snd_hda_codec_read(codec, codec->afg, 0,
+				AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+	}
+	return 0;
+}
+
+static void stac92xx_set_power_state(struct hda_codec *codec, hda_nid_t fg,
+				unsigned int power_state)
+{
+	unsigned int afg_power_state = power_state;
+	struct sigmatel_spec *spec = codec->spec;
+
+	if (power_state == AC_PWRST_D3) {
+		if (spec->gpio_led > 8) {
+			/* with vref-out pin used for mute led control
+			 * codec AFG is prevented from D3 state
+			 */
+			afg_power_state = AC_PWRST_D1;
+		}
+		/* this delay seems necessary to avoid click noise at power-down */
+		msleep(100);
+	}
+	snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+			afg_power_state);
+	snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
+
+/*
+ * For this feature CONFIG_SND_HDA_POWER_SAVE is needed
+ * as mute LED state is updated in check_power_status hook
+ */
+static int stac92xx_update_led_status(struct hda_codec *codec)
+{
+	struct sigmatel_spec *spec = codec->spec;
+	int i, num_ext_dacs, muted = 1;
+	unsigned int muted_lvl, notmtd_lvl;
+	hda_nid_t nid;
+
+	if (!spec->gpio_led)
+		return 0;
 
 	for (i = 0; i < spec->multiout.num_dacs; i++) {
 		nid = spec->multiout.dac_nids[i];
@@ -4933,27 +5079,58 @@
 			break;
 		}
 	}
-	if (muted)
-		spec->gpio_data &= ~spec->gpio_led; /* orange */
-	else
-		spec->gpio_data |= spec->gpio_led; /* white */
-
-	if (!spec->gpio_led_polarity) {
-		/* LED state is inverted on these systems */
-		spec->gpio_data ^= spec->gpio_led;
+	if (muted && spec->multiout.hp_nid)
+		if (!(snd_hda_codec_amp_read(codec,
+				spec->multiout.hp_nid, 0, HDA_OUTPUT, 0) &
+					HDA_AMP_MUTE)) {
+			muted = 0; /* HP is not muted */
+		}
+	num_ext_dacs = ARRAY_SIZE(spec->multiout.extra_out_nid);
+	for (i = 0; muted && i < num_ext_dacs; i++) {
+		nid = spec->multiout.extra_out_nid[i];
+		if (nid == 0)
+			break;
+		if (!(snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
+		      HDA_AMP_MUTE)) {
+			muted = 0; /* extra output is not muted */
+		}
 	}
+	/*polarity defines *not* muted state level*/
+	if (spec->gpio_led <= 8) {
+		if (muted)
+			spec->gpio_data &= ~spec->gpio_led; /* orange */
+		else
+			spec->gpio_data |= spec->gpio_led; /* white */
 
-	stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data);
+		if (!spec->gpio_led_polarity) {
+			/* LED state is inverted on these systems */
+			spec->gpio_data ^= spec->gpio_led;
+		}
+		stac_gpio_set(codec, spec->gpio_mask,
+				spec->gpio_dir, spec->gpio_data);
+	} else {
+		notmtd_lvl = spec->gpio_led_polarity ?
+				AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_GRD;
+		muted_lvl = spec->gpio_led_polarity ?
+				AC_PINCTL_VREF_GRD : AC_PINCTL_VREF_HIZ;
+		spec->vref_led = muted ? muted_lvl : notmtd_lvl;
+		stac_vrefout_set(codec,	spec->gpio_led, spec->vref_led);
+	}
 	return 0;
 }
-#endif
 
-static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state)
+/*
+ * use power check for controlling mute led of HP notebooks
+ */
+static int stac92xx_check_power_status(struct hda_codec *codec,
+					      hda_nid_t nid)
 {
-	stac92xx_shutup(codec);
+	stac92xx_update_led_status(codec);
+
 	return 0;
 }
-#endif
+#endif /* CONFIG_SND_HDA_POWER_SAVE */
+#endif /* CONFIG_PM */
 
 static const struct hda_codec_ops stac92xx_patch_ops = {
 	.build_controls = stac92xx_build_controls,
@@ -4961,7 +5138,7 @@
 	.init = stac92xx_init,
 	.free = stac92xx_free,
 	.unsol_event = stac92xx_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 	.suspend = stac92xx_suspend,
 	.resume = stac92xx_resume,
 #endif
@@ -5477,12 +5654,19 @@
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	if (spec->gpio_led) {
-		spec->gpio_mask |= spec->gpio_led;
-		spec->gpio_dir |= spec->gpio_led;
-		spec->gpio_data |= spec->gpio_led;
-		/* register check_power_status callback. */
+		if (spec->gpio_led <= 8) {
+			spec->gpio_mask |= spec->gpio_led;
+			spec->gpio_dir |= spec->gpio_led;
+			spec->gpio_data |= spec->gpio_led;
+		} else {
+			codec->patch_ops.set_power_state =
+					stac92xx_set_power_state;
+			codec->patch_ops.post_suspend =
+					stac92xx_post_suspend;
+		}
+		codec->patch_ops.pre_resume = stac92xx_pre_resume;
 		codec->patch_ops.check_power_status =
-			stac92xx_hp_check_power_status;
+			stac92xx_check_power_status;
 	}
 #endif	
 
@@ -5805,12 +5989,19 @@
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	if (spec->gpio_led) {
-		spec->gpio_mask |= spec->gpio_led;
-		spec->gpio_dir |= spec->gpio_led;
-		spec->gpio_data |= spec->gpio_led;
-		/* register check_power_status callback. */
+		if (spec->gpio_led <= 8) {
+			spec->gpio_mask |= spec->gpio_led;
+			spec->gpio_dir |= spec->gpio_led;
+			spec->gpio_data |= spec->gpio_led;
+		} else {
+			codec->patch_ops.set_power_state =
+					stac92xx_set_power_state;
+			codec->patch_ops.post_suspend =
+					stac92xx_post_suspend;
+		}
+		codec->patch_ops.pre_resume = stac92xx_pre_resume;
 		codec->patch_ops.check_power_status =
-			stac92xx_hp_check_power_status;
+			stac92xx_check_power_status;
 	}
 #endif	
 
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index f38160b..84d8798 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1708,7 +1708,7 @@
 		via_gpio_control(codec);
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int via_suspend(struct hda_codec *codec, pm_message_t state)
 {
 	struct via_spec *spec = codec->spec;
@@ -1736,7 +1736,7 @@
 	.init = via_init,
 	.free = via_free,
 	.unsol_event = via_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 	.suspend = via_suspend,
 #endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index ff29380..76258f2 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -907,6 +907,7 @@
 				struct regulator_init_data *init_data,
 				int voltage)
 {
+	dev_err(codec->dev, "this setup needs regulator support in the kernel\n");
 	return -EINVAL;
 }
 
@@ -1218,6 +1219,34 @@
 	return 0;
 }
 
+static int sgtl5000_replace_vddd_with_ldo(struct snd_soc_codec *codec)
+{
+	struct sgtl5000_priv *sgtl5000 = snd_soc_codec_get_drvdata(codec);
+	int ret;
+
+	/* set internal ldo to 1.2v */
+	ret = ldo_regulator_register(codec, &ldo_init_data, LDO_VOLTAGE);
+	if (ret) {
+		dev_err(codec->dev,
+			"Failed to register vddd internal supplies: %d\n", ret);
+		return ret;
+	}
+
+	sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
+
+	ret = regulator_bulk_get(codec->dev, ARRAY_SIZE(sgtl5000->supplies),
+			sgtl5000->supplies);
+
+	if (ret) {
+		ldo_regulator_remove(codec);
+		dev_err(codec->dev, "Failed to request supplies: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(codec->dev, "Using internal LDO instead of VDDD\n");
+	return 0;
+}
+
 static int sgtl5000_enable_regulators(struct snd_soc_codec *codec)
 {
 	u16 reg;
@@ -1235,30 +1264,9 @@
 	if (!ret)
 		external_vddd = 1;
 	else {
-		/* set internal ldo to 1.2v */
-		int voltage = LDO_VOLTAGE;
-
-		ret = ldo_regulator_register(codec, &ldo_init_data, voltage);
-		if (ret) {
-			dev_err(codec->dev,
-			"Failed to register vddd internal supplies: %d\n",
-				ret);
+		ret = sgtl5000_replace_vddd_with_ldo(codec);
+		if (ret)
 			return ret;
-		}
-
-		sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
-
-		ret = regulator_bulk_get(codec->dev,
-				ARRAY_SIZE(sgtl5000->supplies),
-				sgtl5000->supplies);
-
-		if (ret) {
-			ldo_regulator_remove(codec);
-			dev_err(codec->dev,
-				"Failed to request supplies: %d\n", ret);
-
-			return ret;
-		}
 	}
 
 	ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies),
@@ -1287,7 +1295,6 @@
 	 * roll back to use internal LDO
 	 */
 	if (external_vddd && rev >= 0x11) {
-		int voltage = LDO_VOLTAGE;
 		/* disable all regulator first */
 		regulator_bulk_disable(ARRAY_SIZE(sgtl5000->supplies),
 					sgtl5000->supplies);
@@ -1295,23 +1302,10 @@
 		regulator_bulk_free(ARRAY_SIZE(sgtl5000->supplies),
 					sgtl5000->supplies);
 
-		ret = ldo_regulator_register(codec, &ldo_init_data, voltage);
+		ret = sgtl5000_replace_vddd_with_ldo(codec);
 		if (ret)
 			return ret;
 
-		sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
-
-		ret = regulator_bulk_get(codec->dev,
-				ARRAY_SIZE(sgtl5000->supplies),
-				sgtl5000->supplies);
-		if (ret) {
-			ldo_regulator_remove(codec);
-			dev_err(codec->dev,
-				"Failed to request supplies: %d\n", ret);
-
-			return ret;
-		}
-
 		ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies),
 						sgtl5000->supplies);
 		if (ret)
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 8499c56..60d740e 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -3409,6 +3409,9 @@
 	active = snd_soc_read(codec, WM8962_INTERRUPT_STATUS_2);
 	active &= ~mask;
 
+	/* Acknowledge the interrupts */
+	snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active);
+
 	if (active & WM8962_FLL_LOCK_EINT) {
 		dev_dbg(codec->dev, "FLL locked\n");
 		complete(&wm8962->fll_lock);
@@ -3433,9 +3436,6 @@
 				      msecs_to_jiffies(250));
 	}
 
-	/* Acknowledge the interrupts */
-	snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active);
-
 	return IRQ_HANDLED;
 }
 
diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c
index 9259f1f..1f11525 100644
--- a/sound/soc/davinci/davinci-vcif.c
+++ b/sound/soc/davinci/davinci-vcif.c
@@ -62,9 +62,9 @@
 	w = readl(davinci_vc->base + DAVINCI_VC_CTRL);
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1);
+		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0);
 	else
-		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1);
+		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0);
 
 	writel(w, davinci_vc->base + DAVINCI_VC_CTRL);
 }
@@ -80,9 +80,9 @@
 	/* Reset transmitter/receiver and sample rate/frame sync generators */
 	w = readl(davinci_vc->base + DAVINCI_VC_CTRL);
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0);
+		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1);
 	else
-		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0);
+		MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1);
 
 	writel(w, davinci_vc->base + DAVINCI_VC_CTRL);
 }
@@ -159,6 +159,7 @@
 	case SNDRV_PCM_TRIGGER_RESUME:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 		davinci_vcif_start(substream);
+		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 1568eea..c086b78 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -21,6 +21,7 @@
 #include <plat/audio.h>
 
 #include "dma.h"
+#include "idma.h"
 #include "i2s.h"
 #include "i2s-regs.h"
 
@@ -60,6 +61,7 @@
 	/* DMA parameters */
 	struct s3c_dma_params dma_playback;
 	struct s3c_dma_params dma_capture;
+	struct s3c_dma_params idma_playback;
 	u32	quirks;
 	u32	suspend_i2smod;
 	u32	suspend_i2scon;
@@ -877,6 +879,10 @@
 	if (i2s->quirks & QUIRK_NEED_RSTCLR)
 		writel(CON_RSTCLR, i2s->addr + I2SCON);
 
+	if (i2s->quirks & QUIRK_SEC_DAI)
+		idma_reg_addr_init((void *)i2s->addr,
+					i2s->sec_dai->idma_playback.dma_addr);
+
 probe_exit:
 	/* Reset any constraint on RFS and BFS */
 	i2s->rfs = 0;
@@ -1077,6 +1083,7 @@
 		sec_dai->dma_playback.dma_size = 4;
 		sec_dai->base = regs_base;
 		sec_dai->quirks = quirks;
+		sec_dai->idma_playback.dma_addr = i2s_cfg->idma_addr;
 		sec_dai->pri_dai = pri_dai;
 		pri_dai->sec_dai = sec_dai;
 	}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index e44267f..83ad8ca 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -577,6 +577,7 @@
 			case SND_SOC_BIAS_OFF:
 				codec->driver->suspend(codec, PMSG_SUSPEND);
 				codec->suspended = 1;
+				codec->cache_sync = 1;
 				break;
 			default:
 				dev_dbg(codec->dev, "CODEC is on over suspend\n");
@@ -1140,7 +1141,7 @@
 			}
 		}
 		cpu_dai->probed = 1;
-		/* mark cpu_dai as probed and add to card cpu_dai list */
+		/* mark cpu_dai as probed and add to card dai list */
 		list_add(&cpu_dai->card_list, &card->dai_dev_list);
 	}
 
@@ -1171,7 +1172,7 @@
 			}
 		}
 
-		/* mark cpu_dai as probed and add to card cpu_dai list */
+		/* mark codec_dai as probed and add to card dai list */
 		codec_dai->probed = 1;
 		list_add(&codec_dai->card_list, &card->dai_dev_list);
 	}
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index fbfcda0..7e15914 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -124,6 +124,36 @@
 	return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL);
 }
 
+/* get snd_card from DAPM context */
+static inline struct snd_card *dapm_get_snd_card(
+	struct snd_soc_dapm_context *dapm)
+{
+	if (dapm->codec)
+		return dapm->codec->card->snd_card;
+	else if (dapm->platform)
+		return dapm->platform->card->snd_card;
+	else
+		BUG();
+
+	/* unreachable */
+	return NULL;
+}
+
+/* get soc_card from DAPM context */
+static inline struct snd_soc_card *dapm_get_soc_card(
+		struct snd_soc_dapm_context *dapm)
+{
+	if (dapm->codec)
+		return dapm->codec->card;
+	else if (dapm->platform)
+		return dapm->platform->card;
+	else
+		BUG();
+
+	/* unreachable */
+	return NULL;
+}
+
 static int soc_widget_read(struct snd_soc_dapm_widget *w, int reg)
 {
 	if (w->codec)